Skip to content

Handle win32 separator for cygwin paths #141864

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions library/std/src/sys/path/cygwin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use crate::ffi::OsString;
use crate::os::unix::ffi::OsStringExt;
use crate::path::{Path, PathBuf};
use crate::sys::common::small_c_string::run_path_with_cstr;
use crate::sys::cvt;
use crate::{io, ptr};

#[inline]
pub fn is_sep_byte(b: u8) -> bool {
b == b'/' || b == b'\\'
}

#[inline]
pub fn is_verbatim_sep(b: u8) -> bool {
b == b'/' || b == b'\\'
}

pub use super::windows_prefix::parse_prefix;

pub const MAIN_SEP_STR: &str = "/";
pub const MAIN_SEP: char = '/';

unsafe extern "C" {
// Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
// Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902
// Safety:
// * `what` should be `CCP_WIN_A_TO_POSIX` here
// * `from` is null-terminated UTF-8 path
// * `to` is buffer, the buffer size is `size`.
//
// Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path.
fn cygwin_conv_path(
what: libc::c_uint,
from: *const libc::c_char,
to: *mut u8,
size: libc::size_t,
) -> libc::ssize_t;
}

const CCP_WIN_A_TO_POSIX: libc::c_uint = 2;

/// Make a POSIX path absolute.
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
run_path_with_cstr(path, &|path| {
let size = cvt(unsafe {
cygwin_conv_path(CCP_WIN_A_TO_POSIX, path.as_ptr(), ptr::null_mut(), 0)
})?;
// If success, size should not be 0.
debug_assert!(size >= 1);
let size = size as usize;
let mut buffer = Vec::with_capacity(size);
cvt(unsafe {
cygwin_conv_path(CCP_WIN_A_TO_POSIX, path.as_ptr(), buffer.as_mut_ptr(), size)
})?;
unsafe {
buffer.set_len(size - 1);
}
Ok(PathBuf::from(OsString::from_vec(buffer)))
})
}

pub(crate) fn is_absolute(path: &Path) -> bool {
path.has_root()
}
5 changes: 5 additions & 0 deletions library/std/src/sys/path/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
cfg_if::cfg_if! {
if #[cfg(target_os = "windows")] {
mod windows;
mod windows_prefix;
pub use windows::*;
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
mod sgx;
Expand All @@ -11,6 +12,10 @@ cfg_if::cfg_if! {
} else if #[cfg(target_os = "uefi")] {
mod uefi;
pub use uefi::*;
} else if #[cfg(target_os = "cygwin")] {
mod cygwin;
mod windows_prefix;
pub use cygwin::*;
} else {
mod unix;
pub use unix::*;
Expand Down
175 changes: 3 additions & 172 deletions library/std/src/sys/path/windows.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use crate::ffi::{OsStr, OsString};
use crate::path::{Path, PathBuf, Prefix};
use crate::path::{Path, PathBuf};
use crate::sys::api::utf16;
use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
use crate::{io, ptr};

#[cfg(test)]
mod tests;

pub use super::windows_prefix::parse_prefix;

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';

Expand Down Expand Up @@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
path.into()
}

struct PrefixParser<'a, const LEN: usize> {
path: &'a OsStr,
prefix: [u8; LEN],
}

impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
#[inline]
fn get_prefix(path: &OsStr) -> [u8; LEN] {
let mut prefix = [0; LEN];
// SAFETY: Only ASCII characters are modified.
for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
prefix[i] = if ch == b'/' { b'\\' } else { ch };
}
prefix
}

fn new(path: &'a OsStr) -> Self {
Self { path, prefix: Self::get_prefix(path) }
}

fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
PrefixParserSlice {
path: self.path,
prefix: &self.prefix[..LEN.min(self.path.len())],
index: 0,
}
}
}

struct PrefixParserSlice<'a, 'b> {
path: &'a OsStr,
prefix: &'b [u8],
index: usize,
}

impl<'a> PrefixParserSlice<'a, '_> {
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
self.prefix[self.index..]
.starts_with(prefix.as_bytes())
.then_some(Self { index: self.index + prefix.len(), ..*self })
}

fn prefix_bytes(&self) -> &'a [u8] {
&self.path.as_encoded_bytes()[..self.index]
}

fn finish(self) -> &'a OsStr {
// SAFETY: The unsafety here stems from converting between &OsStr and
// &[u8] and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced only
// from ASCII-bounded slices of existing &OsStr values.
unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
}
}

pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};

let parser = PrefixParser::<8>::new(path);
let parser = parser.as_slice();
if let Some(parser) = parser.strip_prefix(r"\\") {
// \\

// The meaning of verbatim paths can change when they use a different
// separator.
if let Some(parser) = parser.strip_prefix(r"?\")
&& !parser.prefix_bytes().iter().any(|&x| x == b'/')
{
// \\?\
if let Some(parser) = parser.strip_prefix(r"UNC\") {
// \\?\UNC\server\share

let path = parser.finish();
let (server, path) = parse_next_component(path, true);
let (share, _) = parse_next_component(path, true);

Some(VerbatimUNC(server, share))
} else {
let path = parser.finish();

// in verbatim paths only recognize an exact drive prefix
if let Some(drive) = parse_drive_exact(path) {
// \\?\C:
Some(VerbatimDisk(drive))
} else {
// \\?\prefix
let (prefix, _) = parse_next_component(path, true);
Some(Verbatim(prefix))
}
}
} else if let Some(parser) = parser.strip_prefix(r".\") {
// \\.\COM42
let path = parser.finish();
let (prefix, _) = parse_next_component(path, false);
Some(DeviceNS(prefix))
} else {
let path = parser.finish();
let (server, path) = parse_next_component(path, false);
let (share, _) = parse_next_component(path, false);

if !server.is_empty() && !share.is_empty() {
// \\server\share
Some(UNC(server, share))
} else {
// no valid prefix beginning with "\\" recognized
None
}
}
} else {
// If it has a drive like `C:` then it's a disk.
// Otherwise there is no prefix.
parse_drive(path).map(Disk)
}
}

// Parses a drive prefix, e.g. "C:" and "C:\whatever"
fn parse_drive(path: &OsStr) -> Option<u8> {
// In most DOS systems, it is not possible to have more than 26 drive letters.
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
fn is_valid_drive_letter(drive: &u8) -> bool {
drive.is_ascii_alphabetic()
}

match path.as_encoded_bytes() {
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
_ => None,
}
}

// Parses a drive prefix exactly, e.g. "C:"
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
// only parse two bytes: the drive letter and the drive separator
if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
parse_drive(path)
} else {
None
}
}

// Parse the next path component.
//
// Returns the next component and the rest of the path excluding the component and separator.
// Does not recognize `/` as a separator character if `verbatim` is true.
fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };

match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
Some(separator_start) => {
let separator_end = separator_start + 1;

let component = &path.as_encoded_bytes()[..separator_start];

// Panic safe
// The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
let path = &path.as_encoded_bytes()[separator_end..];

// SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
// is encoded in a single byte, therefore `bytes[separator_start]` and
// `bytes[separator_end]` must be code point boundaries and thus
// `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
unsafe {
(
OsStr::from_encoded_bytes_unchecked(component),
OsStr::from_encoded_bytes_unchecked(path),
)
}
}
None => (path, OsStr::new("")),
}
}

/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
///
/// This path may or may not have a verbatim prefix.
Expand Down
Loading
Loading