Skip to content

Commit c8e9497

Browse files
committed
Auto merge of #90596 - the8472:path-hash-opt, r=Mark-Simulacrum
Optimize Eq and Hash for Path/PathBuf ``` # new test path::tests::bench_hash_path_long ... bench: 86 ns/iter (+/- 1) test path::tests::bench_hash_path_short ... bench: 13 ns/iter (+/- 1) test path::tests::bench_path_hashset ... bench: 197 ns/iter (+/- 6) test path::tests::bench_path_hashset_miss ... bench: 94 ns/iter (+/- 4) # old test path::tests::bench_hash_path_long ... bench: 192 ns/iter (+/- 2) test path::tests::bench_hash_path_short ... bench: 33 ns/iter (+/- 1) test path::tests::bench_path_hashset ... bench: 1,121 ns/iter (+/- 24) test path::tests::bench_path_hashset_miss ... bench: 273 ns/iter (+/- 6) ```
2 parents 3b2c454 + c1ea7bd commit c8e9497

File tree

3 files changed

+140
-11
lines changed

3 files changed

+140
-11
lines changed

library/std/src/path.rs

+61-9
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,25 @@ impl FusedIterator for Components<'_> {}
979979
impl<'a> cmp::PartialEq for Components<'a> {
980980
#[inline]
981981
fn eq(&self, other: &Components<'a>) -> bool {
982+
let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self;
983+
984+
// Fast path for exact matches, e.g. for hashmap lookups.
985+
// Don't explicitly compare the prefix or has_physical_root fields since they'll
986+
// either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`.
987+
if self.path.len() == other.path.len()
988+
&& self.front == other.front
989+
&& self.back == State::Body
990+
&& other.back == State::Body
991+
&& self.prefix_verbatim() == other.prefix_verbatim()
992+
{
993+
// possible future improvement: this could bail out earlier if there were a
994+
// reverse memcmp/bcmp comparing back to front
995+
if self.path == other.path {
996+
return true;
997+
}
998+
}
999+
1000+
// compare back to front since absolute paths often share long prefixes
9821001
Iterator::eq(self.clone().rev(), other.clone().rev())
9831002
}
9841003
}
@@ -1013,13 +1032,12 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
10131032
// The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
10141033
// the middle of one
10151034
if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
1016-
// this might benefit from a [u8]::first_mismatch simd implementation, if it existed
1017-
let first_difference =
1018-
match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
1019-
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
1020-
None => left.path.len().min(right.path.len()),
1021-
Some(diff) => diff,
1022-
};
1035+
// possible future improvement: a [u8]::first_mismatch simd implementation
1036+
let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) {
1037+
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
1038+
None => left.path.len().min(right.path.len()),
1039+
Some(diff) => diff,
1040+
};
10231041

10241042
if let Some(previous_sep) =
10251043
left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
@@ -2873,9 +2891,43 @@ impl cmp::PartialEq for Path {
28732891
#[stable(feature = "rust1", since = "1.0.0")]
28742892
impl Hash for Path {
28752893
fn hash<H: Hasher>(&self, h: &mut H) {
2876-
for component in self.components() {
2877-
component.hash(h);
2894+
let bytes = self.as_u8_slice();
2895+
let prefix_len = match parse_prefix(&self.inner) {
2896+
Some(prefix) => {
2897+
prefix.hash(h);
2898+
prefix.len()
2899+
}
2900+
None => 0,
2901+
};
2902+
let bytes = &bytes[prefix_len..];
2903+
2904+
let mut component_start = 0;
2905+
let mut bytes_hashed = 0;
2906+
2907+
for i in 0..bytes.len() {
2908+
if is_sep_byte(bytes[i]) {
2909+
if i > component_start {
2910+
let to_hash = &bytes[component_start..i];
2911+
h.write(to_hash);
2912+
bytes_hashed += to_hash.len();
2913+
}
2914+
2915+
// skip over separator and optionally a following CurDir item
2916+
// since components() would normalize these away
2917+
component_start = i + match bytes[i..] {
2918+
[_, b'.', b'/', ..] | [_, b'.'] => 2,
2919+
_ => 1,
2920+
};
2921+
}
2922+
}
2923+
2924+
if component_start < bytes.len() {
2925+
let to_hash = &bytes[component_start..];
2926+
h.write(to_hash);
2927+
bytes_hashed += to_hash.len();
28782928
}
2929+
2930+
h.write_usize(bytes_hashed);
28792931
}
28802932
}
28812933

library/std/src/path/tests.rs

+78-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use super::*;
22

3-
use crate::collections::BTreeSet;
3+
use crate::collections::hash_map::DefaultHasher;
4+
use crate::collections::{BTreeSet, HashSet};
5+
use crate::hash::Hasher;
46
use crate::rc::Rc;
57
use crate::sync::Arc;
68
use core::hint::black_box;
@@ -1632,7 +1634,25 @@ fn into_rc() {
16321634
fn test_ord() {
16331635
macro_rules! ord(
16341636
($ord:ident, $left:expr, $right:expr) => ( {
1635-
assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
1637+
use core::cmp::Ordering;
1638+
1639+
let left = Path::new($left);
1640+
let right = Path::new($right);
1641+
assert_eq!(left.cmp(&right), Ordering::$ord);
1642+
if (core::cmp::Ordering::$ord == Ordering::Equal) {
1643+
assert_eq!(left, right);
1644+
1645+
let mut hasher = DefaultHasher::new();
1646+
left.hash(&mut hasher);
1647+
let left_hash = hasher.finish();
1648+
hasher = DefaultHasher::new();
1649+
right.hash(&mut hasher);
1650+
let right_hash = hasher.finish();
1651+
1652+
assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right);
1653+
} else {
1654+
assert_ne!(left, right);
1655+
}
16361656
});
16371657
);
16381658

@@ -1693,3 +1713,59 @@ fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
16931713
set.insert(paths[500].as_path());
16941714
});
16951715
}
1716+
1717+
#[bench]
1718+
fn bench_path_hashset(b: &mut test::Bencher) {
1719+
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
1720+
let paths: Vec<_> =
1721+
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
1722+
1723+
let mut set = HashSet::new();
1724+
1725+
paths.iter().for_each(|p| {
1726+
set.insert(p.as_path());
1727+
});
1728+
1729+
b.iter(|| {
1730+
set.remove(paths[500].as_path());
1731+
set.insert(black_box(paths[500].as_path()))
1732+
});
1733+
}
1734+
1735+
#[bench]
1736+
fn bench_path_hashset_miss(b: &mut test::Bencher) {
1737+
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
1738+
let paths: Vec<_> =
1739+
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
1740+
1741+
let mut set = HashSet::new();
1742+
1743+
paths.iter().for_each(|p| {
1744+
set.insert(p.as_path());
1745+
});
1746+
1747+
let probe = PathBuf::from(prefix).join("other");
1748+
1749+
b.iter(|| set.remove(black_box(probe.as_path())));
1750+
}
1751+
1752+
#[bench]
1753+
fn bench_hash_path_short(b: &mut test::Bencher) {
1754+
let mut hasher = DefaultHasher::new();
1755+
let path = Path::new("explorer.exe");
1756+
1757+
b.iter(|| black_box(path).hash(&mut hasher));
1758+
1759+
black_box(hasher.finish());
1760+
}
1761+
1762+
#[bench]
1763+
fn bench_hash_path_long(b: &mut test::Bencher) {
1764+
let mut hasher = DefaultHasher::new();
1765+
let path =
1766+
Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff");
1767+
1768+
b.iter(|| black_box(path).hash(&mut hasher));
1769+
1770+
black_box(hasher.finish());
1771+
}

library/std/src/sys/unix/path.rs

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub fn is_verbatim_sep(b: u8) -> bool {
1111
b == b'/'
1212
}
1313

14+
#[inline]
1415
pub fn parse_prefix(_: &OsStr) -> Option<Prefix<'_>> {
1516
None
1617
}

0 commit comments

Comments
 (0)