|
| 1 | +use super::{is_sep_byte, is_verbatim_sep}; |
| 2 | +use crate::ffi::OsStr; |
| 3 | +use crate::path::Prefix; |
| 4 | + |
| 5 | +struct PrefixParser<'a, const LEN: usize> { |
| 6 | + path: &'a OsStr, |
| 7 | + prefix: [u8; LEN], |
| 8 | +} |
| 9 | + |
| 10 | +impl<'a, const LEN: usize> PrefixParser<'a, LEN> { |
| 11 | + #[inline] |
| 12 | + fn get_prefix(path: &OsStr) -> [u8; LEN] { |
| 13 | + let mut prefix = [0; LEN]; |
| 14 | + // SAFETY: Only ASCII characters are modified. |
| 15 | + for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { |
| 16 | + prefix[i] = if ch == b'/' { b'\\' } else { ch }; |
| 17 | + } |
| 18 | + prefix |
| 19 | + } |
| 20 | + |
| 21 | + fn new(path: &'a OsStr) -> Self { |
| 22 | + Self { path, prefix: Self::get_prefix(path) } |
| 23 | + } |
| 24 | + |
| 25 | + fn as_slice(&self) -> PrefixParserSlice<'a, '_> { |
| 26 | + PrefixParserSlice { |
| 27 | + path: self.path, |
| 28 | + prefix: &self.prefix[..LEN.min(self.path.len())], |
| 29 | + index: 0, |
| 30 | + } |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +struct PrefixParserSlice<'a, 'b> { |
| 35 | + path: &'a OsStr, |
| 36 | + prefix: &'b [u8], |
| 37 | + index: usize, |
| 38 | +} |
| 39 | + |
| 40 | +impl<'a> PrefixParserSlice<'a, '_> { |
| 41 | + fn strip_prefix(&self, prefix: &str) -> Option<Self> { |
| 42 | + self.prefix[self.index..] |
| 43 | + .starts_with(prefix.as_bytes()) |
| 44 | + .then_some(Self { index: self.index + prefix.len(), ..*self }) |
| 45 | + } |
| 46 | + |
| 47 | + fn prefix_bytes(&self) -> &'a [u8] { |
| 48 | + &self.path.as_encoded_bytes()[..self.index] |
| 49 | + } |
| 50 | + |
| 51 | + fn finish(self) -> &'a OsStr { |
| 52 | + // SAFETY: The unsafety here stems from converting between &OsStr and |
| 53 | + // &[u8] and back. This is safe to do because (1) we only look at ASCII |
| 54 | + // contents of the encoding and (2) new &OsStr values are produced only |
| 55 | + // from ASCII-bounded slices of existing &OsStr values. |
| 56 | + unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { |
| 61 | + use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; |
| 62 | + |
| 63 | + let parser = PrefixParser::<8>::new(path); |
| 64 | + let parser = parser.as_slice(); |
| 65 | + if let Some(parser) = parser.strip_prefix(r"\\") { |
| 66 | + // \\ |
| 67 | + |
| 68 | + // The meaning of verbatim paths can change when they use a different |
| 69 | + // separator. |
| 70 | + if let Some(parser) = parser.strip_prefix(r"?\") |
| 71 | + // Cygwin allows `/` in verbatim paths. |
| 72 | + && (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/')) |
| 73 | + { |
| 74 | + // \\?\ |
| 75 | + if let Some(parser) = parser.strip_prefix(r"UNC\") { |
| 76 | + // \\?\UNC\server\share |
| 77 | + |
| 78 | + let path = parser.finish(); |
| 79 | + let (server, path) = parse_next_component(path, true); |
| 80 | + let (share, _) = parse_next_component(path, true); |
| 81 | + |
| 82 | + Some(VerbatimUNC(server, share)) |
| 83 | + } else { |
| 84 | + let path = parser.finish(); |
| 85 | + |
| 86 | + // in verbatim paths only recognize an exact drive prefix |
| 87 | + if let Some(drive) = parse_drive_exact(path) { |
| 88 | + // \\?\C: |
| 89 | + Some(VerbatimDisk(drive)) |
| 90 | + } else { |
| 91 | + // \\?\prefix |
| 92 | + let (prefix, _) = parse_next_component(path, true); |
| 93 | + Some(Verbatim(prefix)) |
| 94 | + } |
| 95 | + } |
| 96 | + } else if let Some(parser) = parser.strip_prefix(r".\") { |
| 97 | + // \\.\COM42 |
| 98 | + let path = parser.finish(); |
| 99 | + let (prefix, _) = parse_next_component(path, false); |
| 100 | + Some(DeviceNS(prefix)) |
| 101 | + } else { |
| 102 | + let path = parser.finish(); |
| 103 | + let (server, path) = parse_next_component(path, false); |
| 104 | + let (share, _) = parse_next_component(path, false); |
| 105 | + |
| 106 | + if !server.is_empty() && !share.is_empty() { |
| 107 | + // \\server\share |
| 108 | + Some(UNC(server, share)) |
| 109 | + } else { |
| 110 | + // no valid prefix beginning with "\\" recognized |
| 111 | + None |
| 112 | + } |
| 113 | + } |
| 114 | + } else { |
| 115 | + // If it has a drive like `C:` then it's a disk. |
| 116 | + // Otherwise there is no prefix. |
| 117 | + parse_drive(path).map(Disk) |
| 118 | + } |
| 119 | +} |
| 120 | + |
| 121 | +// Parses a drive prefix, e.g. "C:" and "C:\whatever" |
| 122 | +fn parse_drive(path: &OsStr) -> Option<u8> { |
| 123 | + // In most DOS systems, it is not possible to have more than 26 drive letters. |
| 124 | + // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. |
| 125 | + fn is_valid_drive_letter(drive: &u8) -> bool { |
| 126 | + drive.is_ascii_alphabetic() |
| 127 | + } |
| 128 | + |
| 129 | + match path.as_encoded_bytes() { |
| 130 | + [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), |
| 131 | + _ => None, |
| 132 | + } |
| 133 | +} |
| 134 | + |
| 135 | +// Parses a drive prefix exactly, e.g. "C:" |
| 136 | +fn parse_drive_exact(path: &OsStr) -> Option<u8> { |
| 137 | + // only parse two bytes: the drive letter and the drive separator |
| 138 | + if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { |
| 139 | + parse_drive(path) |
| 140 | + } else { |
| 141 | + None |
| 142 | + } |
| 143 | +} |
| 144 | + |
| 145 | +// Parse the next path component. |
| 146 | +// |
| 147 | +// Returns the next component and the rest of the path excluding the component and separator. |
| 148 | +// Does not recognize `/` as a separator character on Windows if `verbatim` is true. |
| 149 | +fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { |
| 150 | + let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; |
| 151 | + |
| 152 | + match path.as_encoded_bytes().iter().position(|&x| separator(x)) { |
| 153 | + Some(separator_start) => { |
| 154 | + let separator_end = separator_start + 1; |
| 155 | + |
| 156 | + let component = &path.as_encoded_bytes()[..separator_start]; |
| 157 | + |
| 158 | + // Panic safe |
| 159 | + // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. |
| 160 | + let path = &path.as_encoded_bytes()[separator_end..]; |
| 161 | + |
| 162 | + // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') |
| 163 | + // is encoded in a single byte, therefore `bytes[separator_start]` and |
| 164 | + // `bytes[separator_end]` must be code point boundaries and thus |
| 165 | + // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. |
| 166 | + unsafe { |
| 167 | + ( |
| 168 | + OsStr::from_encoded_bytes_unchecked(component), |
| 169 | + OsStr::from_encoded_bytes_unchecked(path), |
| 170 | + ) |
| 171 | + } |
| 172 | + } |
| 173 | + None => (path, OsStr::new("")), |
| 174 | + } |
| 175 | +} |
0 commit comments