Skip to content

Commit 8d2caed

Browse files
committed
Share prefix parser with cygwin
1 parent 9284a3b commit 8d2caed

File tree

4 files changed

+183
-178
lines changed

4 files changed

+183
-178
lines changed

library/std/src/sys/path/cygwin.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::ffi::{OsStr, OsString};
1+
use crate::ffi::OsString;
22
use crate::os::unix::ffi::OsStringExt;
3-
use crate::path::{Path, PathBuf, Prefix};
3+
use crate::path::{Path, PathBuf};
44
use crate::sys::common::small_c_string::run_path_with_cstr;
55
use crate::sys::cvt;
66
use crate::{io, ptr};
@@ -15,10 +15,7 @@ pub fn is_verbatim_sep(b: u8) -> bool {
1515
b == b'/' || b == b'\\'
1616
}
1717

18-
#[inline]
19-
pub fn parse_prefix(_: &OsStr) -> Option<Prefix<'_>> {
20-
None
21-
}
18+
pub use super::prefix::parse_prefix;
2219

2320
pub const MAIN_SEP_STR: &str = "/";
2421
pub const MAIN_SEP: char = '/';

library/std/src/sys/path/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
cfg_if::cfg_if! {
22
if #[cfg(target_os = "windows")] {
3+
mod prefix;
34
mod windows;
45
pub use windows::*;
56
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
@@ -13,6 +14,7 @@ cfg_if::cfg_if! {
1314
pub use uefi::*;
1415
} else if #[cfg(target_os = "cygwin")] {
1516
mod cygwin;
17+
mod prefix;
1618
pub use cygwin::*;
1719
} else {
1820
mod unix;

library/std/src/sys/path/prefix.rs

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
use super::{is_sep_byte, is_verbatim_sep};
2+
use crate::ffi::OsStr;
3+
use crate::path::Prefix;
4+
5+
struct PrefixParser<'a, const LEN: usize> {
6+
path: &'a OsStr,
7+
prefix: [u8; LEN],
8+
}
9+
10+
impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
11+
#[inline]
12+
fn get_prefix(path: &OsStr) -> [u8; LEN] {
13+
let mut prefix = [0; LEN];
14+
// SAFETY: Only ASCII characters are modified.
15+
for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
16+
prefix[i] = if ch == b'/' { b'\\' } else { ch };
17+
}
18+
prefix
19+
}
20+
21+
fn new(path: &'a OsStr) -> Self {
22+
Self { path, prefix: Self::get_prefix(path) }
23+
}
24+
25+
fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
26+
PrefixParserSlice {
27+
path: self.path,
28+
prefix: &self.prefix[..LEN.min(self.path.len())],
29+
index: 0,
30+
}
31+
}
32+
}
33+
34+
struct PrefixParserSlice<'a, 'b> {
35+
path: &'a OsStr,
36+
prefix: &'b [u8],
37+
index: usize,
38+
}
39+
40+
impl<'a> PrefixParserSlice<'a, '_> {
41+
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
42+
self.prefix[self.index..]
43+
.starts_with(prefix.as_bytes())
44+
.then_some(Self { index: self.index + prefix.len(), ..*self })
45+
}
46+
47+
fn prefix_bytes(&self) -> &'a [u8] {
48+
&self.path.as_encoded_bytes()[..self.index]
49+
}
50+
51+
fn finish(self) -> &'a OsStr {
52+
// SAFETY: The unsafety here stems from converting between &OsStr and
53+
// &[u8] and back. This is safe to do because (1) we only look at ASCII
54+
// contents of the encoding and (2) new &OsStr values are produced only
55+
// from ASCII-bounded slices of existing &OsStr values.
56+
unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
57+
}
58+
}
59+
60+
pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
61+
use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
62+
63+
let parser = PrefixParser::<8>::new(path);
64+
let parser = parser.as_slice();
65+
if let Some(parser) = parser.strip_prefix(r"\\") {
66+
// \\
67+
68+
// The meaning of verbatim paths can change when they use a different
69+
// separator.
70+
if let Some(parser) = parser.strip_prefix(r"?\")
71+
// Cygwin allows `/` in verbatim paths.
72+
&& (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/'))
73+
{
74+
// \\?\
75+
if let Some(parser) = parser.strip_prefix(r"UNC\") {
76+
// \\?\UNC\server\share
77+
78+
let path = parser.finish();
79+
let (server, path) = parse_next_component(path, true);
80+
let (share, _) = parse_next_component(path, true);
81+
82+
Some(VerbatimUNC(server, share))
83+
} else {
84+
let path = parser.finish();
85+
86+
// in verbatim paths only recognize an exact drive prefix
87+
if let Some(drive) = parse_drive_exact(path) {
88+
// \\?\C:
89+
Some(VerbatimDisk(drive))
90+
} else {
91+
// \\?\prefix
92+
let (prefix, _) = parse_next_component(path, true);
93+
Some(Verbatim(prefix))
94+
}
95+
}
96+
} else if let Some(parser) = parser.strip_prefix(r".\") {
97+
// \\.\COM42
98+
let path = parser.finish();
99+
let (prefix, _) = parse_next_component(path, false);
100+
Some(DeviceNS(prefix))
101+
} else {
102+
let path = parser.finish();
103+
let (server, path) = parse_next_component(path, false);
104+
let (share, _) = parse_next_component(path, false);
105+
106+
if !server.is_empty() && !share.is_empty() {
107+
// \\server\share
108+
Some(UNC(server, share))
109+
} else {
110+
// no valid prefix beginning with "\\" recognized
111+
None
112+
}
113+
}
114+
} else {
115+
// If it has a drive like `C:` then it's a disk.
116+
// Otherwise there is no prefix.
117+
parse_drive(path).map(Disk)
118+
}
119+
}
120+
121+
// Parses a drive prefix, e.g. "C:" and "C:\whatever"
122+
fn parse_drive(path: &OsStr) -> Option<u8> {
123+
// In most DOS systems, it is not possible to have more than 26 drive letters.
124+
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
125+
fn is_valid_drive_letter(drive: &u8) -> bool {
126+
drive.is_ascii_alphabetic()
127+
}
128+
129+
match path.as_encoded_bytes() {
130+
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
131+
_ => None,
132+
}
133+
}
134+
135+
// Parses a drive prefix exactly, e.g. "C:"
136+
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
137+
// only parse two bytes: the drive letter and the drive separator
138+
if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
139+
parse_drive(path)
140+
} else {
141+
None
142+
}
143+
}
144+
145+
// Parse the next path component.
146+
//
147+
// Returns the next component and the rest of the path excluding the component and separator.
148+
// Does not recognize `/` as a separator character on Windows if `verbatim` is true.
149+
fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
150+
let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
151+
152+
match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
153+
Some(separator_start) => {
154+
let separator_end = separator_start + 1;
155+
156+
let component = &path.as_encoded_bytes()[..separator_start];
157+
158+
// Panic safe
159+
// The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
160+
let path = &path.as_encoded_bytes()[separator_end..];
161+
162+
// SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
163+
// is encoded in a single byte, therefore `bytes[separator_start]` and
164+
// `bytes[separator_end]` must be code point boundaries and thus
165+
// `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
166+
unsafe {
167+
(
168+
OsStr::from_encoded_bytes_unchecked(component),
169+
OsStr::from_encoded_bytes_unchecked(path),
170+
)
171+
}
172+
}
173+
None => (path, OsStr::new("")),
174+
}
175+
}

0 commit comments

Comments
 (0)