Skip to content

Commit af0424e

Browse files
authored
Rollup merge of rust-lang#82191 - Soveu:dedup, r=nagisa
Vec::dedup_by optimization Now `Vec::dedup_by` drops items in-place as it goes through them. From my benchmarks, it is around 10% faster when T is small, with no major regression when otherwise. I used `ptr::copy` instead of conditional `ptr::copy_nonoverlapping`, because the latter had some weird performance issues on my ryzen laptop (it was 50% slower on it than on intel/sandybridge laptop) It would be good if someone was able to reproduce these results.
2 parents 03abd47 + b0092bc commit af0424e

File tree

5 files changed

+306
-6
lines changed

5 files changed

+306
-6
lines changed

library/alloc/benches/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#![feature(btree_drain_filter)]
55
#![feature(map_first_last)]
66
#![feature(repr_simd)]
7+
#![feature(slice_partition_dedup)]
78
#![feature(test)]
89

910
extern crate test;

library/alloc/benches/vec.rs

+89
Original file line numberDiff line numberDiff line change
@@ -671,3 +671,92 @@ fn bench_map_fast(b: &mut Bencher) {
671671
let data = black_box([(0, 0); LEN]);
672672
b.iter(|| map_fast(&data));
673673
}
674+
675+
fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
676+
let mask = if buf.len() < 8192 {
677+
0xFF
678+
} else if buf.len() < 200_000 {
679+
0xFFFF
680+
} else {
681+
0xFFFF_FFFF
682+
};
683+
684+
for item in buf.iter_mut() {
685+
seed ^= seed << 13;
686+
seed ^= seed >> 17;
687+
seed ^= seed << 5;
688+
689+
*item = seed & mask;
690+
}
691+
692+
buf.sort();
693+
}
694+
695+
fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) {
696+
let mut template = vec![0u32; sz];
697+
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
698+
random_sorted_fill(0x43, &mut template);
699+
700+
let mut vec = template.clone();
701+
b.iter(|| {
702+
let len = {
703+
let (dedup, _) = vec.partition_dedup();
704+
dedup.len()
705+
};
706+
vec.truncate(len);
707+
708+
black_box(vec.first());
709+
vec.clear();
710+
vec.extend_from_slice(&template);
711+
});
712+
}
713+
714+
fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) {
715+
let mut template = vec![0u32; sz];
716+
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
717+
random_sorted_fill(0x43, &mut template);
718+
719+
let mut vec = template.clone();
720+
b.iter(|| {
721+
vec.dedup();
722+
black_box(vec.first());
723+
vec.clear();
724+
vec.extend_from_slice(&template);
725+
});
726+
}
727+
728+
#[bench]
729+
fn bench_dedup_old_100(b: &mut Bencher) {
730+
bench_vec_dedup_old(b, 100);
731+
}
732+
#[bench]
733+
fn bench_dedup_new_100(b: &mut Bencher) {
734+
bench_vec_dedup_new(b, 100);
735+
}
736+
737+
#[bench]
738+
fn bench_dedup_old_1000(b: &mut Bencher) {
739+
bench_vec_dedup_old(b, 1000);
740+
}
741+
#[bench]
742+
fn bench_dedup_new_1000(b: &mut Bencher) {
743+
bench_vec_dedup_new(b, 1000);
744+
}
745+
746+
#[bench]
747+
fn bench_dedup_old_10000(b: &mut Bencher) {
748+
bench_vec_dedup_old(b, 10000);
749+
}
750+
#[bench]
751+
fn bench_dedup_new_10000(b: &mut Bencher) {
752+
bench_vec_dedup_new(b, 10000);
753+
}
754+
755+
#[bench]
756+
fn bench_dedup_old_100000(b: &mut Bencher) {
757+
bench_vec_dedup_old(b, 100000);
758+
}
759+
#[bench]
760+
fn bench_dedup_new_100000(b: &mut Bencher) {
761+
bench_vec_dedup_new(b, 100000);
762+
}

library/alloc/src/vec/mod.rs

+89-6
Original file line numberDiff line numberDiff line change
@@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> {
15121512
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
15131513
/// ```
15141514
#[stable(feature = "dedup_by", since = "1.16.0")]
1515-
pub fn dedup_by<F>(&mut self, same_bucket: F)
1515+
pub fn dedup_by<F>(&mut self, mut same_bucket: F)
15161516
where
15171517
F: FnMut(&mut T, &mut T) -> bool,
15181518
{
1519-
let len = {
1520-
let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
1521-
dedup.len()
1522-
};
1523-
self.truncate(len);
1519+
let len = self.len();
1520+
if len <= 1 {
1521+
return;
1522+
}
1523+
1524+
/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
1525+
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
1526+
/* Offset of the element we want to check if it is duplicate */
1527+
read: usize,
1528+
1529+
/* Offset of the place where we want to place the non-duplicate
1530+
* when we find it. */
1531+
write: usize,
1532+
1533+
/* The Vec that would need correction if `same_bucket` panicked */
1534+
vec: &'a mut Vec<T, A>,
1535+
}
1536+
1537+
impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
1538+
fn drop(&mut self) {
1539+
/* This code gets executed when `same_bucket` panics */
1540+
1541+
/* SAFETY: invariant guarantees that `read - write`
1542+
* and `len - read` never overflow and that the copy is always
1543+
* in-bounds. */
1544+
unsafe {
1545+
let ptr = self.vec.as_mut_ptr();
1546+
let len = self.vec.len();
1547+
1548+
/* How many items were left when `same_bucket` paniced.
1549+
* Basically vec[read..].len() */
1550+
let items_left = len.wrapping_sub(self.read);
1551+
1552+
/* Pointer to first item in vec[write..write+items_left] slice */
1553+
let dropped_ptr = ptr.add(self.write);
1554+
/* Pointer to first item in vec[read..] slice */
1555+
let valid_ptr = ptr.add(self.read);
1556+
1557+
/* Copy `vec[read..]` to `vec[write..write+items_left]`.
1558+
* The slices can overlap, so `copy_nonoverlapping` cannot be used */
1559+
ptr::copy(valid_ptr, dropped_ptr, items_left);
1560+
1561+
/* How many items have been already dropped
1562+
* Basically vec[read..write].len() */
1563+
let dropped = self.read.wrapping_sub(self.write);
1564+
1565+
self.vec.set_len(len - dropped);
1566+
}
1567+
}
1568+
}
1569+
1570+
let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
1571+
let ptr = gap.vec.as_mut_ptr();
1572+
1573+
/* Drop items while going through Vec, it should be more efficient than
1574+
* doing slice partition_dedup + truncate */
1575+
1576+
/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
1577+
* are always in-bounds and read_ptr never aliases prev_ptr */
1578+
unsafe {
1579+
while gap.read < len {
1580+
let read_ptr = ptr.add(gap.read);
1581+
let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
1582+
1583+
if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
1584+
/* We have found duplicate, drop it in-place */
1585+
ptr::drop_in_place(read_ptr);
1586+
} else {
1587+
let write_ptr = ptr.add(gap.write);
1588+
1589+
/* Because `read_ptr` can be equal to `write_ptr`, we either
1590+
* have to use `copy` or conditional `copy_nonoverlapping`.
1591+
* Looks like the first option is faster. */
1592+
ptr::copy(read_ptr, write_ptr, 1);
1593+
1594+
/* We have filled that place, so go further */
1595+
gap.write += 1;
1596+
}
1597+
1598+
gap.read += 1;
1599+
}
1600+
1601+
/* Technically we could let `gap` clean up with its Drop, but
1602+
* when `same_bucket` is guaranteed to not panic, this bloats a little
1603+
* the codegen, so we just do it manually */
1604+
gap.vec.set_len(gap.write);
1605+
mem::forget(gap);
1606+
}
15241607
}
15251608

15261609
/// Appends an element to the back of a collection.

library/alloc/tests/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#![feature(int_bits_const)]
2020
#![feature(vecdeque_binary_search)]
2121
#![feature(slice_group_by)]
22+
#![feature(slice_partition_dedup)]
2223
#![feature(vec_extend_from_within)]
2324
#![feature(vec_spare_capacity)]
2425

library/alloc/tests/vec.rs

+126
Original file line numberDiff line numberDiff line change
@@ -2102,6 +2102,132 @@ fn test_extend_from_within() {
21022102
assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]);
21032103
}
21042104

2105+
#[test]
2106+
fn test_vec_dedup_by() {
2107+
let mut vec: Vec<i32> = vec![1, -1, 2, 3, 1, -5, 5, -2, 2];
2108+
2109+
vec.dedup_by(|a, b| a.abs() == b.abs());
2110+
2111+
assert_eq!(vec, [1, 2, 3, 1, -5, -2]);
2112+
}
2113+
2114+
#[test]
2115+
fn test_vec_dedup_empty() {
2116+
let mut vec: Vec<i32> = Vec::new();
2117+
2118+
vec.dedup();
2119+
2120+
assert_eq!(vec, []);
2121+
}
2122+
2123+
#[test]
2124+
fn test_vec_dedup_one() {
2125+
let mut vec = vec![12i32];
2126+
2127+
vec.dedup();
2128+
2129+
assert_eq!(vec, [12]);
2130+
}
2131+
2132+
#[test]
2133+
fn test_vec_dedup_multiple_ident() {
2134+
let mut vec = vec![12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];
2135+
2136+
vec.dedup();
2137+
2138+
assert_eq!(vec, [12, 11]);
2139+
}
2140+
2141+
#[test]
2142+
fn test_vec_dedup_partialeq() {
2143+
#[derive(Debug)]
2144+
struct Foo(i32, i32);
2145+
2146+
impl PartialEq for Foo {
2147+
fn eq(&self, other: &Foo) -> bool {
2148+
self.0 == other.0
2149+
}
2150+
}
2151+
2152+
let mut vec = vec![Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];
2153+
2154+
vec.dedup();
2155+
assert_eq!(vec, [Foo(0, 1), Foo(1, 7)]);
2156+
}
2157+
2158+
#[test]
2159+
fn test_vec_dedup() {
2160+
let mut vec: Vec<bool> = Vec::with_capacity(8);
2161+
let mut template = vec.clone();
2162+
2163+
for x in 0u8..255u8 {
2164+
vec.clear();
2165+
template.clear();
2166+
2167+
let iter = (0..8).map(move |bit| (x >> bit) & 1 == 1);
2168+
vec.extend(iter);
2169+
template.extend_from_slice(&vec);
2170+
2171+
let (dedup, _) = template.partition_dedup();
2172+
vec.dedup();
2173+
2174+
assert_eq!(vec, dedup);
2175+
}
2176+
}
2177+
2178+
#[test]
2179+
fn test_vec_dedup_panicking() {
2180+
#[derive(Debug)]
2181+
struct Panic {
2182+
drop_counter: &'static AtomicU32,
2183+
value: bool,
2184+
index: usize,
2185+
}
2186+
2187+
impl PartialEq for Panic {
2188+
fn eq(&self, other: &Self) -> bool {
2189+
self.value == other.value
2190+
}
2191+
}
2192+
2193+
impl Drop for Panic {
2194+
fn drop(&mut self) {
2195+
let x = self.drop_counter.fetch_add(1, Ordering::SeqCst);
2196+
assert!(x != 4);
2197+
}
2198+
}
2199+
2200+
static DROP_COUNTER: AtomicU32 = AtomicU32::new(0);
2201+
let expected = [
2202+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
2203+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
2204+
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
2205+
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
2206+
];
2207+
let mut vec = vec![
2208+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
2209+
// these elements get deduplicated
2210+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 1 },
2211+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 2 },
2212+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 3 },
2213+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 4 },
2214+
// here it panics
2215+
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
2216+
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
2217+
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
2218+
];
2219+
2220+
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2221+
vec.dedup();
2222+
}));
2223+
2224+
let ok = vec.iter().zip(expected.iter()).all(|(x, y)| x.index == y.index);
2225+
2226+
if !ok {
2227+
panic!("expected: {:?}\ngot: {:?}\n", expected, vec);
2228+
}
2229+
}
2230+
21052231
// Regression test for issue #82533
21062232
#[test]
21072233
fn test_extend_from_within_panicing_clone() {

0 commit comments

Comments
 (0)