Skip to content

Commit d5b6b95

Browse files
committed
Auto merge of rust-lang#52553 - Pazzaz:vecdeque-append, r=SimonSapin
Non-naive implementation of `VecDeque.append` Replaces the old, simple implementation with a more manual (and **unsafe** 😱) one. I've added 1 more test and verified that it covers all 6 code paths in the function. This new implementation was about 60% faster than the old naive one when I tried benchmarking it.
2 parents 6b1ff19 + b063bd4 commit d5b6b95

File tree

4 files changed

+313
-2
lines changed

4 files changed

+313
-2
lines changed

src/liballoc/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,8 @@ path = "../liballoc/tests/lib.rs"
2323
[[bench]]
2424
name = "collectionsbenches"
2525
path = "../liballoc/benches/lib.rs"
26+
27+
[[bench]]
28+
name = "vec_deque_append_bench"
29+
path = "../liballoc/benches/vec_deque_append.rs"
30+
harness = false
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
#![feature(duration_as_u128)]
12+
use std::{collections::VecDeque, time::Instant};
13+
14+
const VECDEQUE_LEN: i32 = 100000;
15+
const WARMUP_N: usize = 100;
16+
const BENCH_N: usize = 1000;
17+
18+
fn main() {
19+
let a: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
20+
let b: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
21+
22+
for _ in 0..WARMUP_N {
23+
let mut c = a.clone();
24+
let mut d = b.clone();
25+
c.append(&mut d);
26+
}
27+
28+
let mut durations = Vec::with_capacity(BENCH_N);
29+
30+
for _ in 0..BENCH_N {
31+
let mut c = a.clone();
32+
let mut d = b.clone();
33+
let before = Instant::now();
34+
c.append(&mut d);
35+
let after = Instant::now();
36+
durations.push(after.duration_since(before));
37+
}
38+
39+
let l = durations.len();
40+
durations.sort();
41+
42+
assert!(BENCH_N % 2 == 0);
43+
let median = (durations[(l / 2) - 1] + durations[l / 2]) / 2;
44+
println!(
45+
"\ncustom-bench vec_deque_append {:?} ns/iter\n",
46+
median.as_nanos()
47+
);
48+
}

src/liballoc/collections/vec_deque.rs

+159-2
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,23 @@ impl<T> VecDeque<T> {
202202
len);
203203
}
204204

205+
/// Returns a pair of slices which contain the contents of the buffer not used by the VecDeque.
206+
#[inline]
207+
unsafe fn unused_as_mut_slices<'a>(&'a mut self) -> (&'a mut [T], &'a mut [T]) {
208+
let head = self.head;
209+
let tail = self.tail;
210+
let buf = self.buffer_as_mut_slice();
211+
if head != tail {
212+
// In buf, head..tail contains the VecDeque and tail..head is unused.
213+
// So calling `ring_slices` with tail and head swapped returns unused slices.
214+
RingSlices::ring_slices(buf, tail, head)
215+
} else {
216+
// Swapping doesn't help when head == tail.
217+
let (before, after) = buf.split_at_mut(head);
218+
(after, before)
219+
}
220+
}
221+
205222
/// Copies a potentially wrapping block of memory len long from src to dest.
206223
/// (abs(dst - src) + len) must be no larger than cap() (There must be at
207224
/// most one continuous overlapping region between src and dest).
@@ -1834,8 +1851,148 @@ impl<T> VecDeque<T> {
18341851
#[inline]
18351852
#[stable(feature = "append", since = "1.4.0")]
18361853
pub fn append(&mut self, other: &mut Self) {
1837-
// naive impl
1838-
self.extend(other.drain(..));
1854+
// Copies all values from `src_slice` to the start of `dst_slice`.
1855+
unsafe fn copy_whole_slice<T>(src_slice: &[T], dst_slice: &mut [T]) {
1856+
let len = src_slice.len();
1857+
ptr::copy_nonoverlapping(src_slice.as_ptr(), dst_slice[..len].as_mut_ptr(), len);
1858+
}
1859+
1860+
let src_total = other.len();
1861+
1862+
// Guarantees there is space in `self` for `other`.
1863+
self.reserve(src_total);
1864+
1865+
self.head = {
1866+
let original_head = self.head;
1867+
1868+
// The goal is to copy all values from `other` into `self`. To avoid any
1869+
// mismatch, all valid values in `other` are retrieved...
1870+
let (src_high, src_low) = other.as_slices();
1871+
// and unoccupied parts of self are retrieved.
1872+
let (dst_high, dst_low) = unsafe { self.unused_as_mut_slices() };
1873+
1874+
// Then all that is needed is to copy all values from
1875+
// src (src_high and src_low) to dst (dst_high and dst_low).
1876+
//
1877+
// other [o o o . . . . . o o o o]
1878+
// [5 6 7] [1 2 3 4]
1879+
// src_low src_high
1880+
//
1881+
// self [. . . . . . o o o o . .]
1882+
// [3 4 5 6 7 .] [1 2]
1883+
// dst_low dst_high
1884+
//
1885+
// Values are not copied one by one but as slices in `copy_whole_slice`.
1886+
// What slices are used depends on various properties of src and dst.
1887+
// There are 6 cases in total:
1888+
// 1. `src` is contiguous and fits in dst_high
1889+
// 2. `src` is contiguous and does not fit in dst_high
1890+
// 3. `src` is discontiguous and fits in dst_high
1891+
// 4. `src` is discontiguous and does not fit in dst_high
1892+
// + src_high is smaller than dst_high
1893+
// 5. `src` is discontiguous and does not fit in dst_high
1894+
// + dst_high is smaller than src_high
1895+
// 6. `src` is discontiguous and does not fit in dst_high
1896+
// + dst_high is the same size as src_high
1897+
let src_contiguous = src_low.is_empty();
1898+
let dst_high_fits_src = dst_high.len() >= src_total;
1899+
match (src_contiguous, dst_high_fits_src) {
1900+
(true, true) => {
1901+
// 1.
1902+
// other [. . . o o o . . . . . .]
1903+
// [] [1 1 1]
1904+
//
1905+
// self [. o o o o o . . . . . .]
1906+
// [.] [1 1 1 . . .]
1907+
1908+
unsafe {
1909+
copy_whole_slice(src_high, dst_high);
1910+
}
1911+
original_head + src_total
1912+
}
1913+
(true, false) => {
1914+
// 2.
1915+
// other [. . . o o o o o . . . .]
1916+
// [] [1 1 2 2 2]
1917+
//
1918+
// self [. . . . . . . o o o . .]
1919+
// [2 2 2 . . . .] [1 1]
1920+
1921+
let (src_1, src_2) = src_high.split_at(dst_high.len());
1922+
unsafe {
1923+
copy_whole_slice(src_1, dst_high);
1924+
copy_whole_slice(src_2, dst_low);
1925+
}
1926+
src_total - dst_high.len()
1927+
}
1928+
(false, true) => {
1929+
// 3.
1930+
// other [o o . . . . . . . o o o]
1931+
// [2 2] [1 1 1]
1932+
//
1933+
// self [. o o . . . . . . . . .]
1934+
// [.] [1 1 1 2 2 . . . .]
1935+
1936+
let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
1937+
unsafe {
1938+
copy_whole_slice(src_high, dst_1);
1939+
copy_whole_slice(src_low, dst_2);
1940+
}
1941+
original_head + src_total
1942+
}
1943+
(false, false) => {
1944+
if src_high.len() < dst_high.len() {
1945+
// 4.
1946+
// other [o o o . . . . . . o o o]
1947+
// [2 3 3] [1 1 1]
1948+
//
1949+
// self [. . . . . . o o . . . .]
1950+
// [3 3 . . . .] [1 1 1 2]
1951+
1952+
let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
1953+
let (src_2, src_3) = src_low.split_at(dst_2.len());
1954+
unsafe {
1955+
copy_whole_slice(src_high, dst_1);
1956+
copy_whole_slice(src_2, dst_2);
1957+
copy_whole_slice(src_3, dst_low);
1958+
}
1959+
src_3.len()
1960+
} else if src_high.len() > dst_high.len() {
1961+
// 5.
1962+
// other [o o o . . . . . o o o o]
1963+
// [3 3 3] [1 1 2 2]
1964+
//
1965+
// self [. . . . . . o o o o . .]
1966+
// [2 2 3 3 3 .] [1 1]
1967+
1968+
let (src_1, src_2) = src_high.split_at(dst_high.len());
1969+
let (dst_2, dst_3) = dst_low.split_at_mut(src_2.len());
1970+
unsafe {
1971+
copy_whole_slice(src_1, dst_high);
1972+
copy_whole_slice(src_2, dst_2);
1973+
copy_whole_slice(src_low, dst_3);
1974+
}
1975+
dst_2.len() + src_low.len()
1976+
} else {
1977+
// 6.
1978+
// other [o o . . . . . . . o o o]
1979+
// [2 2] [1 1 1]
1980+
//
1981+
// self [. . . . . . . o o . . .]
1982+
// [2 2 . . . . .] [1 1 1]
1983+
1984+
unsafe {
1985+
copy_whole_slice(src_high, dst_high);
1986+
copy_whole_slice(src_low, dst_low);
1987+
}
1988+
src_low.len()
1989+
}
1990+
}
1991+
}
1992+
};
1993+
1994+
// Some values now exist in both `other` and `self` but are made inaccessible in `other`.
1995+
other.tail = other.head;
18391996
}
18401997

18411998
/// Retains only the elements specified by the predicate.

src/liballoc/tests/vec_deque.rs

+101
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,107 @@ fn test_append() {
928928
assert_eq!(a.iter().cloned().collect::<Vec<_>>(), []);
929929
}
930930

931+
#[test]
932+
fn test_append_permutations() {
933+
fn construct_vec_deque(
934+
push_back: usize,
935+
pop_back: usize,
936+
push_front: usize,
937+
pop_front: usize,
938+
) -> VecDeque<usize> {
939+
let mut out = VecDeque::new();
940+
for a in 0..push_back {
941+
out.push_back(a);
942+
}
943+
for b in 0..push_front {
944+
out.push_front(push_back + b);
945+
}
946+
for _ in 0..pop_back {
947+
out.pop_back();
948+
}
949+
for _ in 0..pop_front {
950+
out.pop_front();
951+
}
952+
out
953+
}
954+
955+
const MAX: usize = 5;
956+
957+
// Many different permutations of both the `VecDeque` getting appended to
958+
// and the one getting appended are generated to check `append`.
959+
// This ensures all 6 code paths of `append` are tested.
960+
for src_push_back in 0..MAX {
961+
for src_push_front in 0..MAX {
962+
// doesn't pop more values than are pushed
963+
for src_pop_back in 0..(src_push_back + src_push_front) {
964+
for src_pop_front in 0..(src_push_back + src_push_front - src_pop_back) {
965+
966+
let src = construct_vec_deque(
967+
src_push_back,
968+
src_pop_back,
969+
src_push_front,
970+
src_pop_front,
971+
);
972+
973+
for dst_push_back in 0..MAX {
974+
for dst_push_front in 0..MAX {
975+
for dst_pop_back in 0..(dst_push_back + dst_push_front) {
976+
for dst_pop_front
977+
in 0..(dst_push_back + dst_push_front - dst_pop_back)
978+
{
979+
let mut dst = construct_vec_deque(
980+
dst_push_back,
981+
dst_pop_back,
982+
dst_push_front,
983+
dst_pop_front,
984+
);
985+
let mut src = src.clone();
986+
987+
// Assert that appending `src` to `dst` gives the same order
988+
// of values as iterating over both in sequence.
989+
let correct = dst
990+
.iter()
991+
.chain(src.iter())
992+
.cloned()
993+
.collect::<Vec<usize>>();
994+
dst.append(&mut src);
995+
assert_eq!(dst, correct);
996+
assert!(src.is_empty());
997+
}
998+
}
999+
}
1000+
}
1001+
}
1002+
}
1003+
}
1004+
}
1005+
}
1006+
1007+
struct DropCounter<'a> {
1008+
count: &'a mut u32,
1009+
}
1010+
1011+
impl<'a> Drop for DropCounter<'a> {
1012+
fn drop(&mut self) {
1013+
*self.count += 1;
1014+
}
1015+
}
1016+
1017+
#[test]
1018+
fn test_append_double_drop() {
1019+
let (mut count_a, mut count_b) = (0, 0);
1020+
{
1021+
let mut a = VecDeque::new();
1022+
let mut b = VecDeque::new();
1023+
a.push_back(DropCounter { count: &mut count_a });
1024+
b.push_back(DropCounter { count: &mut count_b });
1025+
1026+
a.append(&mut b);
1027+
}
1028+
assert_eq!(count_a, 1);
1029+
assert_eq!(count_b, 1);
1030+
}
1031+
9311032
#[test]
9321033
fn test_retain() {
9331034
let mut buf = VecDeque::new();

0 commit comments

Comments
 (0)