diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 050c617f5649c..54cee9575db92 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -299,8 +299,8 @@ where //////////////////////////////////////////////////////////////////////////////// /// Says whether the first byte after the prefix is a separator. -fn has_physical_root(s: &[u8], prefix: Option>) -> bool { - let path = if let Some(p) = prefix { &s[p.len()..] } else { s }; +fn has_physical_root(s: &[u8], prefix: Option>, pre_prefix_len: usize) -> bool { + let path = if let Some(p) = prefix { &s[p.len() + pre_prefix_len..] } else { s }; !path.is_empty() && is_sep_byte(path[0]) } @@ -600,6 +600,9 @@ pub struct Components<'a> { // The prefix as it was originally parsed, if any prefix: Option>, + // The length of `./` in `//./` on Cygwin + pre_prefix_len: usize, + // true if path *physically* has a root separator; for most Windows // prefixes, it may have a "logical" root separator for the purposes of // normalization, e.g., \\server\share == \\server\share\. @@ -643,7 +646,7 @@ impl<'a> Components<'a> { // how long is the prefix, if any? #[inline] fn prefix_len(&self) -> usize { - self.prefix.as_ref().map(Prefix::len).unwrap_or(0) + self.prefix.as_ref().map(Prefix::len).unwrap_or(0) + self.pre_prefix_len } #[inline] @@ -989,7 +992,14 @@ impl FusedIterator for Components<'_> {} impl<'a> PartialEq for Components<'a> { #[inline] fn eq(&self, other: &Components<'a>) -> bool { - let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self; + let Components { + path: _, + front: _, + back: _, + has_physical_root: _, + prefix: _, + pre_prefix_len: _, + } = self; // Fast path for exact matches, e.g. for hashmap lookups. // Don't explicitly compare the prefix or has_physical_root fields since they'll @@ -999,6 +1009,7 @@ impl<'a> PartialEq for Components<'a> { && self.back == State::Body && other.back == State::Body && self.prefix_verbatim() == other.prefix_verbatim() + && self.pre_prefix_len == other.pre_prefix_len { // possible future improvement: this could bail out earlier if there were a // reverse memcmp/bcmp comparing back to front @@ -1315,8 +1326,17 @@ impl PathBuf { need_sep = false } + let need_clear = if cfg!(target_os = "cygwin") { + // If path is absolute and its prefix is none, it is like `/foo`, + // and will be handled below. + path.prefix().is_some() + } else { + // On Unix: prefix is always None. + path.is_absolute() || path.prefix().is_some() + }; + // absolute `path` replaces `self` - if path.is_absolute() || path.prefix().is_some() { + if need_clear { self.inner.truncate(0); // verbatim paths need . and .. removed @@ -2862,11 +2882,15 @@ impl Path { /// [`CurDir`]: Component::CurDir #[stable(feature = "rust1", since = "1.0.0")] pub fn components(&self) -> Components<'_> { - let prefix = parse_prefix(self.as_os_str()); + let (pre_prefix_len, prefix) = match parse_prefix(self.as_os_str()) { + Some((pre_prefix_len, prefix)) => (pre_prefix_len, Some(prefix)), + None => (0, None), + }; Components { path: self.as_u8_slice(), prefix, - has_physical_root: has_physical_root(self.as_u8_slice(), prefix), + pre_prefix_len, + has_physical_root: has_physical_root(self.as_u8_slice(), prefix, pre_prefix_len), front: State::Prefix, back: State::Body, } @@ -3330,9 +3354,9 @@ impl Hash for Path { fn hash(&self, h: &mut H) { let bytes = self.as_u8_slice(); let (prefix_len, verbatim) = match parse_prefix(&self.inner) { - Some(prefix) => { + Some((pre_prefix_len, prefix)) => { prefix.hash(h); - (prefix.len(), prefix.is_verbatim()) + (prefix.len() + pre_prefix_len, prefix.is_verbatim()) } None => (0, false), }; @@ -3615,6 +3639,9 @@ impl Error for NormalizeError {} /// paths, this is currently equivalent to calling /// [`GetFullPathNameW`][windows-path]. /// +/// On Cygwin, this is currently equivalent to calling [`cygwin_conv_path`][cygwin-path] +/// with mode `CCP_WIN_A_TO_POSIX`. +/// /// Note that these [may change in the future][changes]. /// /// # Errors @@ -3667,11 +3694,36 @@ impl Error for NormalizeError {} /// # fn main() {} /// ``` /// +/// ## Cygwin paths +/// +/// ``` +/// # #[cfg(target_os = "cygwin")] +/// fn main() -> std::io::Result<()> { +/// use std::path::{self, Path}; +/// +/// // Relative to absolute +/// let absolute = path::absolute("foo/./bar")?; +/// assert!(absolute.ends_with(r"foo/bar")); +/// +/// // Windows absolute to absolute +/// let absolute = path::absolute(r"C:\foo//test\..\./bar.rs")?; +/// assert!(absolute.ends_with("/c/foo/bar.rs")); +/// +/// // POSIX absolute to absolute +/// let absolute = path::absolute("/foo//test/.././bar.rs")?; +/// assert_eq!(absolute, Path::new("/foo//test/.././bar.rs")); +/// Ok(()) +/// } +/// # #[cfg(not(target_os = "cygwin"))] +/// # fn main() {} +/// ``` +/// /// Note that this [may change in the future][changes]. /// /// [changes]: io#platform-specific-behavior /// [posix-semantics]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 /// [windows-path]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew +/// [cygwin-path]: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html #[stable(feature = "absolute_path", since = "1.79.0")] pub fn absolute>(path: P) -> io::Result { let path = path.as_ref(); diff --git a/library/std/src/sys/path/cygwin.rs b/library/std/src/sys/path/cygwin.rs new file mode 100644 index 0000000000000..43f32e46c37d4 --- /dev/null +++ b/library/std/src/sys/path/cygwin.rs @@ -0,0 +1,65 @@ +use crate::ffi::OsString; +use crate::os::unix::ffi::OsStringExt; +use crate::path::{Path, PathBuf}; +use crate::sys::common::small_c_string::run_path_with_cstr; +use crate::sys::cvt; +use crate::{io, ptr}; + +#[inline] +pub fn is_sep_byte(b: u8) -> bool { + b == b'/' || b == b'\\' +} + +/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\` +/// internally when calling Win32 APIs. Therefore, the server component of path +/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost` +/// on Cygwin. +#[inline] +pub fn is_verbatim_sep(b: u8) -> bool { + b == b'/' || b == b'\\' +} + +pub use super::windows_prefix::parse_prefix; + +pub const MAIN_SEP_STR: &str = "/"; +pub const MAIN_SEP: char = '/'; + +unsafe extern "C" { + // Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html + // Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902 + // Safety: + // * `what` should be `CCP_WIN_A_TO_POSIX` here + // * `from` is null-terminated UTF-8 path + // * `to` is buffer, the buffer size is `size`. + // + // Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path. + fn cygwin_conv_path( + what: libc::c_uint, + from: *const libc::c_char, + to: *mut u8, + size: libc::size_t, + ) -> libc::ssize_t; +} + +const CCP_WIN_A_TO_POSIX: libc::c_uint = 2; + +/// Make a POSIX path absolute. +pub(crate) fn absolute(path: &Path) -> io::Result { + run_path_with_cstr(path, &|path| { + let conv = CCP_WIN_A_TO_POSIX; + let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?; + // If success, size should not be 0. + debug_assert!(size >= 1); + let size = size as usize; + let mut buffer = Vec::with_capacity(size); + cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?; + unsafe { + buffer.set_len(size - 1); + } + Ok(PathBuf::from(OsString::from_vec(buffer))) + }) +} + +pub(crate) fn is_absolute(path: &Path) -> bool { + path.has_root() +} diff --git a/library/std/src/sys/path/mod.rs b/library/std/src/sys/path/mod.rs index 1fa4e80d6780c..a4ff4338cf5f4 100644 --- a/library/std/src/sys/path/mod.rs +++ b/library/std/src/sys/path/mod.rs @@ -1,6 +1,7 @@ cfg_if::cfg_if! { if #[cfg(target_os = "windows")] { mod windows; + mod windows_prefix; pub use windows::*; } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] { mod sgx; @@ -11,6 +12,10 @@ cfg_if::cfg_if! { } else if #[cfg(target_os = "uefi")] { mod uefi; pub use uefi::*; + } else if #[cfg(target_os = "cygwin")] { + mod cygwin; + mod windows_prefix; + pub use cygwin::*; } else { mod unix; pub use unix::*; diff --git a/library/std/src/sys/path/sgx.rs b/library/std/src/sys/path/sgx.rs index 32c7752f605d9..b6c10a11fcef5 100644 --- a/library/std/src/sys/path/sgx.rs +++ b/library/std/src/sys/path/sgx.rs @@ -13,7 +13,7 @@ pub fn is_verbatim_sep(b: u8) -> bool { b == b'/' } -pub fn parse_prefix(_: &OsStr) -> Option> { +pub fn parse_prefix(_: &OsStr) -> Option<(usize, Prefix<'_>)> { None } diff --git a/library/std/src/sys/path/uefi.rs b/library/std/src/sys/path/uefi.rs index a3f4a3bfe1b67..d9d80cb17b75b 100644 --- a/library/std/src/sys/path/uefi.rs +++ b/library/std/src/sys/path/uefi.rs @@ -17,7 +17,7 @@ pub fn is_verbatim_sep(b: u8) -> bool { b == b'\\' } -pub fn parse_prefix(_: &OsStr) -> Option> { +pub fn parse_prefix(_: &OsStr) -> Option<(usize, Prefix<'_>)> { None } diff --git a/library/std/src/sys/path/unix.rs b/library/std/src/sys/path/unix.rs index faa2616a6320d..50e3ff626b137 100644 --- a/library/std/src/sys/path/unix.rs +++ b/library/std/src/sys/path/unix.rs @@ -13,7 +13,7 @@ pub fn is_verbatim_sep(b: u8) -> bool { } #[inline] -pub fn parse_prefix(_: &OsStr) -> Option> { +pub fn parse_prefix(_: &OsStr) -> Option<(usize, Prefix<'_>)> { None } diff --git a/library/std/src/sys/path/unsupported_backslash.rs b/library/std/src/sys/path/unsupported_backslash.rs index 30b06c132c98d..2f9deb229568f 100644 --- a/library/std/src/sys/path/unsupported_backslash.rs +++ b/library/std/src/sys/path/unsupported_backslash.rs @@ -14,7 +14,7 @@ pub fn is_verbatim_sep(b: u8) -> bool { b == b'\\' } -pub fn parse_prefix(_: &OsStr) -> Option> { +pub fn parse_prefix(_: &OsStr) -> Option<(usize, Prefix<'_>)> { None } diff --git a/library/std/src/sys/path/windows.rs b/library/std/src/sys/path/windows.rs index e0e003f6a8192..f124e1e5a71c7 100644 --- a/library/std/src/sys/path/windows.rs +++ b/library/std/src/sys/path/windows.rs @@ -1,5 +1,5 @@ use crate::ffi::{OsStr, OsString}; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::{Path, PathBuf}; use crate::sys::api::utf16; use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s}; use crate::{io, ptr}; @@ -7,6 +7,8 @@ use crate::{io, ptr}; #[cfg(test)] mod tests; +pub use super::windows_prefix::parse_prefix; + pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; @@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { path.into() } -struct PrefixParser<'a, const LEN: usize> { - path: &'a OsStr, - prefix: [u8; LEN], -} - -impl<'a, const LEN: usize> PrefixParser<'a, LEN> { - #[inline] - fn get_prefix(path: &OsStr) -> [u8; LEN] { - let mut prefix = [0; LEN]; - // SAFETY: Only ASCII characters are modified. - for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { - prefix[i] = if ch == b'/' { b'\\' } else { ch }; - } - prefix - } - - fn new(path: &'a OsStr) -> Self { - Self { path, prefix: Self::get_prefix(path) } - } - - fn as_slice(&self) -> PrefixParserSlice<'a, '_> { - PrefixParserSlice { - path: self.path, - prefix: &self.prefix[..LEN.min(self.path.len())], - index: 0, - } - } -} - -struct PrefixParserSlice<'a, 'b> { - path: &'a OsStr, - prefix: &'b [u8], - index: usize, -} - -impl<'a> PrefixParserSlice<'a, '_> { - fn strip_prefix(&self, prefix: &str) -> Option { - self.prefix[self.index..] - .starts_with(prefix.as_bytes()) - .then_some(Self { index: self.index + prefix.len(), ..*self }) - } - - fn prefix_bytes(&self) -> &'a [u8] { - &self.path.as_encoded_bytes()[..self.index] - } - - fn finish(self) -> &'a OsStr { - // SAFETY: The unsafety here stems from converting between &OsStr and - // &[u8] and back. This is safe to do because (1) we only look at ASCII - // contents of the encoding and (2) new &OsStr values are produced only - // from ASCII-bounded slices of existing &OsStr values. - unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } - } -} - -pub fn parse_prefix(path: &OsStr) -> Option> { - use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; - - let parser = PrefixParser::<8>::new(path); - let parser = parser.as_slice(); - if let Some(parser) = parser.strip_prefix(r"\\") { - // \\ - - // The meaning of verbatim paths can change when they use a different - // separator. - if let Some(parser) = parser.strip_prefix(r"?\") - && !parser.prefix_bytes().iter().any(|&x| x == b'/') - { - // \\?\ - if let Some(parser) = parser.strip_prefix(r"UNC\") { - // \\?\UNC\server\share - - let path = parser.finish(); - let (server, path) = parse_next_component(path, true); - let (share, _) = parse_next_component(path, true); - - Some(VerbatimUNC(server, share)) - } else { - let path = parser.finish(); - - // in verbatim paths only recognize an exact drive prefix - if let Some(drive) = parse_drive_exact(path) { - // \\?\C: - Some(VerbatimDisk(drive)) - } else { - // \\?\prefix - let (prefix, _) = parse_next_component(path, true); - Some(Verbatim(prefix)) - } - } - } else if let Some(parser) = parser.strip_prefix(r".\") { - // \\.\COM42 - let path = parser.finish(); - let (prefix, _) = parse_next_component(path, false); - Some(DeviceNS(prefix)) - } else { - let path = parser.finish(); - let (server, path) = parse_next_component(path, false); - let (share, _) = parse_next_component(path, false); - - if !server.is_empty() && !share.is_empty() { - // \\server\share - Some(UNC(server, share)) - } else { - // no valid prefix beginning with "\\" recognized - None - } - } - } else { - // If it has a drive like `C:` then it's a disk. - // Otherwise there is no prefix. - parse_drive(path).map(Disk) - } -} - -// Parses a drive prefix, e.g. "C:" and "C:\whatever" -fn parse_drive(path: &OsStr) -> Option { - // In most DOS systems, it is not possible to have more than 26 drive letters. - // See . - fn is_valid_drive_letter(drive: &u8) -> bool { - drive.is_ascii_alphabetic() - } - - match path.as_encoded_bytes() { - [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), - _ => None, - } -} - -// Parses a drive prefix exactly, e.g. "C:" -fn parse_drive_exact(path: &OsStr) -> Option { - // only parse two bytes: the drive letter and the drive separator - if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { - parse_drive(path) - } else { - None - } -} - -// Parse the next path component. -// -// Returns the next component and the rest of the path excluding the component and separator. -// Does not recognize `/` as a separator character if `verbatim` is true. -fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { - let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; - - match path.as_encoded_bytes().iter().position(|&x| separator(x)) { - Some(separator_start) => { - let separator_end = separator_start + 1; - - let component = &path.as_encoded_bytes()[..separator_start]; - - // Panic safe - // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. - let path = &path.as_encoded_bytes()[separator_end..]; - - // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') - // is encoded in a single byte, therefore `bytes[separator_start]` and - // `bytes[separator_end]` must be code point boundaries and thus - // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. - unsafe { - ( - OsStr::from_encoded_bytes_unchecked(component), - OsStr::from_encoded_bytes_unchecked(path), - ) - } - } - None => (path, OsStr::new("")), - } -} - /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. /// /// This path may or may not have a verbatim prefix. diff --git a/library/std/src/sys/path/windows_prefix.rs b/library/std/src/sys/path/windows_prefix.rs new file mode 100644 index 0000000000000..2545098092311 --- /dev/null +++ b/library/std/src/sys/path/windows_prefix.rs @@ -0,0 +1,210 @@ +//! Parse Windows prefixes, for both Windows and Cygwin. + +use super::{is_sep_byte, is_verbatim_sep}; +use crate::ffi::OsStr; +use crate::path::Prefix; + +struct PrefixParser<'a, const LEN: usize> { + path: &'a OsStr, + prefix: [u8; LEN], +} + +impl<'a, const LEN: usize> PrefixParser<'a, LEN> { + #[inline] + fn get_prefix(path: &OsStr) -> [u8; LEN] { + let mut prefix = [0; LEN]; + // SAFETY: Only ASCII characters are modified. + for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { + prefix[i] = if ch == b'/' { b'\\' } else { ch }; + } + prefix + } + + fn new(path: &'a OsStr) -> Self { + Self { path, prefix: Self::get_prefix(path) } + } + + fn as_slice(&self) -> PrefixParserSlice<'a, '_> { + PrefixParserSlice { + path: self.path, + prefix: &self.prefix[..LEN.min(self.path.len())], + index: 0, + } + } +} + +struct PrefixParserSlice<'a, 'b> { + path: &'a OsStr, + prefix: &'b [u8], + index: usize, +} + +impl<'a> PrefixParserSlice<'a, '_> { + fn strip_prefix(&self, prefix: &str) -> Option { + self.prefix[self.index..] + .starts_with(prefix.as_bytes()) + .then_some(Self { index: self.index + prefix.len(), ..*self }) + } + + fn prefix_bytes(&self) -> &'a [u8] { + &self.path.as_encoded_bytes()[..self.index] + } + + fn finish(self) -> &'a OsStr { + // SAFETY: The unsafety here stems from converting between &OsStr and + // &[u8] and back. This is safe to do because (1) we only look at ASCII + // contents of the encoding and (2) new &OsStr values are produced only + // from ASCII-bounded slices of existing &OsStr values. + unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } + } +} + +/// Returns the parsed prefix and possible pre-prefix length on Cygwin. +/// Usually the pre-prefix length is 0, but `//./` & `//../` are treated specially +/// as equivalent to `//`, and in that case the pre-prefix length is 2 for `./` +/// and 3 for `../`. +pub fn parse_prefix(path: &OsStr) -> Option<(usize, Prefix<'_>)> { + use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; + + let parser = PrefixParser::<8>::new(path); + let parser = parser.as_slice(); + if let Some(parser) = parser.strip_prefix(r"\\") { + // \\ + + // Cygwin treats //./ & //../ as // + let (pre_prefix_len, new_parser) = + if cfg!(target_os = "cygwin") && path.as_encoded_bytes().starts_with(b"//.") { + let mut pre_prefix_len = 0; + let mut path = &path.as_encoded_bytes()[2..]; + loop { + // Only search for `/`. + match path.iter().position(|&x| x == b'/') { + Some(separator_start) => { + let component = &path[..separator_start]; + if matches!(component, b"." | b"..") { + let separator_end = separator_start + 1; + path = &path[separator_end..]; + pre_prefix_len += separator_end; + } else { + break; + } + } + None => break, + } + } + let path = unsafe { OsStr::from_encoded_bytes_unchecked(path) }; + (pre_prefix_len, Some(PrefixParser::<6>::new(path))) + } else { + (0, None) + }; + let parser = new_parser.as_ref().map(|parser| parser.as_slice()).unwrap_or(parser); + + // The meaning of verbatim paths can change when they use a different + // separator. + let prefix = if let Some(parser) = parser.strip_prefix(r"?\") + // Cygwin allows `/` in verbatim paths. + && (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/')) + { + // \\?\ + if let Some(parser) = parser.strip_prefix(r"UNC\") { + // \\?\UNC\server\share + + let path = parser.finish(); + let (server, path) = parse_next_component(path, true); + let (share, _) = parse_next_component(path, true); + + Some(VerbatimUNC(server, share)) + } else { + let path = parser.finish(); + + // in verbatim paths only recognize an exact drive prefix + if let Some(drive) = parse_drive_exact(path) { + // \\?\C: + Some(VerbatimDisk(drive)) + } else { + // \\?\prefix + let (prefix, _) = parse_next_component(path, true); + Some(Verbatim(prefix)) + } + } + } else if let Some(parser) = parser.strip_prefix(r".\") { + // \\.\COM42 + let path = parser.finish(); + let (prefix, _) = parse_next_component(path, false); + Some(DeviceNS(prefix)) + } else { + let path = parser.finish(); + let (server, path) = parse_next_component(path, false); + let (share, _) = parse_next_component(path, false); + + if !server.is_empty() && !share.is_empty() { + // \\server\share + Some(UNC(server, share)) + } else { + // no valid prefix beginning with "\\" recognized + None + } + }; + Some((pre_prefix_len, prefix?)) + } else { + // If it has a drive like `C:` then it's a disk. + // Otherwise there is no prefix. + Some((0, Disk(parse_drive(path)?))) + } +} + +// Parses a drive prefix, e.g. "C:" and "C:\whatever" +fn parse_drive(path: &OsStr) -> Option { + // In most DOS systems, it is not possible to have more than 26 drive letters. + // See . + fn is_valid_drive_letter(drive: &u8) -> bool { + drive.is_ascii_alphabetic() + } + + match path.as_encoded_bytes() { + [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), + _ => None, + } +} + +// Parses a drive prefix exactly, e.g. "C:" +fn parse_drive_exact(path: &OsStr) -> Option { + // only parse two bytes: the drive letter and the drive separator + if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { + parse_drive(path) + } else { + None + } +} + +// Parse the next path component. +// +// Returns the next component and the rest of the path excluding the component and separator. +// Does not recognize `/` as a separator character on Windows if `verbatim` is true. +fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { + let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; + + match path.as_encoded_bytes().iter().position(|&x| separator(x)) { + Some(separator_start) => { + let separator_end = separator_start + 1; + + let component = &path.as_encoded_bytes()[..separator_start]; + + // Panic safe + // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. + let path = &path.as_encoded_bytes()[separator_end..]; + + // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') + // is encoded in a single byte, therefore `bytes[separator_start]` and + // `bytes[separator_end]` must be code point boundaries and thus + // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. + unsafe { + ( + OsStr::from_encoded_bytes_unchecked(component), + OsStr::from_encoded_bytes_unchecked(path), + ) + } + } + None => (path, OsStr::new("")), + } +} diff --git a/library/std/tests/path.rs b/library/std/tests/path.rs index 781855a2d14aa..7f2bb1cb046fe 100644 --- a/library/std/tests/path.rs +++ b/library/std/tests/path.rs @@ -1113,6 +1113,429 @@ pub fn test_decompositions_windows() { ); } +// Unix paths are tested in `test_decompositions_unix` above. +#[test] +#[cfg(target_os = "cygwin")] +pub fn test_decompositions_cygwin() { + t!("\\", + iter: ["/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("c:", + iter: ["c:"], + has_root: false, + is_absolute: false, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("c:\\", + iter: ["c:", "/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("c:/", + iter: ["c:", "/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("a\\b\\c", + iter: ["a", "b", "c"], + has_root: false, + is_absolute: false, + parent: Some("a\\b"), + file_name: Some("c"), + file_stem: Some("c"), + extension: None, + file_prefix: Some("c") + ); + + t!("\\a", + iter: ["/", "a"], + has_root: true, + is_absolute: true, + parent: Some("\\"), + file_name: Some("a"), + file_stem: Some("a"), + extension: None, + file_prefix: Some("a") + ); + + t!("c:\\foo.txt", + iter: ["c:", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("c:\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\server\\share\\foo.txt", + iter: ["\\\\server\\share", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\server\\share\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("//server/share/foo.txt", + iter: ["//server/share", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("//server/share/"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\server\\share", + iter: ["\\\\server\\share", "/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\server", + iter: ["/", "server"], + has_root: true, + is_absolute: true, + parent: Some("\\"), + file_name: Some("server"), + file_stem: Some("server"), + extension: None, + file_prefix: Some("server") + ); + + t!("\\\\?\\bar\\foo.txt", + iter: ["\\\\?\\bar", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\bar\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\bar", + iter: ["\\\\?\\bar"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\", + iter: ["\\\\?\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\UNC\\server\\share\\foo.txt", + iter: ["\\\\?\\UNC\\server\\share", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\UNC\\server\\share\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\UNC\\server/share\\foo.txt", + iter: ["\\\\?\\UNC\\server/share", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\UNC\\server/share\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("//?/UNC/server/share/foo.txt", + iter: ["//?/UNC/server/share", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("//?/UNC/server/share/"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\UNC\\server", + iter: ["\\\\?\\UNC\\server"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\UNC\\", + iter: ["\\\\?\\UNC\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\C:\\foo.txt", + iter: ["\\\\?\\C:", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("//?/C:/foo.txt", + iter: ["//?/C:", "/", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("//?/C:/"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\C:\\", + iter: ["\\\\?\\C:", "/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\C:", + iter: ["\\\\?\\C:"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\foo/bar", + iter: ["\\\\?\\foo", "/", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\?\\C:/foo/bar", + iter: ["\\\\?\\C:", "/", "foo", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:/foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\.\\foo\\bar", + iter: ["\\\\.\\foo", "/", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\.\\foo\\"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\.\\foo", + iter: ["\\\\.\\foo", "/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\.\\foo/bar", + iter: ["\\\\.\\foo", "/", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\.\\foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\.\\foo\\bar/baz", + iter: ["\\\\.\\foo", "/", "bar", "baz"], + has_root: true, + is_absolute: true, + parent: Some("\\\\.\\foo\\bar"), + file_name: Some("baz"), + file_stem: Some("baz"), + extension: None, + file_prefix: Some("baz") + ); + + t!("\\\\.\\", + iter: ["\\\\.\\", "/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("//.\\foo/bar", + iter: ["//.\\foo", "/", "bar"], + has_root: true, + is_absolute: true, + parent: Some("//.\\foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\./foo/bar", + iter: ["\\\\./foo", "/", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\./foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("//./?/C:/foo/bar", + iter: ["//./?/C:", "/", "foo", "bar"], + has_root: true, + is_absolute: true, + parent: Some("//./?/C:/foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("//././../././../?/C:/foo/bar", + iter: ["//././../././../?/C:", "/", "foo", "bar"], + has_root: true, + is_absolute: true, + parent: Some("//././../././../?/C:/foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\?\\a\\b\\", + iter: ["\\\\?\\a", "/", "b"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\a\\"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("\\\\?\\C:\\foo.txt.zip", + iter: ["\\\\?\\C:", "/", "foo.txt.zip"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some("foo.txt.zip"), + file_stem: Some("foo.txt"), + extension: Some("zip"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\C:\\.foo.txt.zip", + iter: ["\\\\?\\C:", "/", ".foo.txt.zip"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some(".foo.txt.zip"), + file_stem: Some(".foo.txt"), + extension: Some("zip"), + file_prefix: Some(".foo") + ); + + t!("\\\\?\\C:\\.foo", + iter: ["\\\\?\\C:", "/", ".foo"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some(".foo"), + file_stem: Some(".foo"), + extension: None, + file_prefix: Some(".foo") + ); +} + #[test] pub fn test_stem_ext() { t!("foo", @@ -1228,6 +1651,11 @@ pub fn test_push() { tp!("/foo/bar", "/", "/"); tp!("/foo/bar", "/baz", "/baz"); tp!("/foo/bar", "./baz", "/foo/bar/./baz"); + + if cfg!(target_os = "cygwin") { + tp!("c:\\", "windows", "c:\\windows"); + tp!("c:", "windows", "c:windows"); + } } else { tp!("", "foo", "foo"); tp!("foo", "bar", r"foo\bar");