Skip to content

Commit 0db48a7

Browse files
committed
Auto merge of rust-lang#120050 - scottmcm:vec-resize-memset, r=<try>
`Vec::resize` for bytes should be a single `memset` Really I just started by trying to see if specializing `iter::repeat_n` would help the perf issue that kept me from removing `Vec::extend_with` last time I tried, but I noticed in the process that a resize for bytes doesn't set all the new space with a single `memset`: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=35175ec844b46fcd95e2d0aad526859e> So using `repeat_n` to implement it -- like `VecDeque` uses, with the specialization for `next` to avoid a branch -- means that the optimizer notices the resize can set all the values with a single memset.
2 parents f45fe57 + 8652062 commit 0db48a7

File tree

4 files changed

+46
-44
lines changed

4 files changed

+46
-44
lines changed

Diff for: library/alloc/src/vec/mod.rs

+1-33
Original file line numberDiff line numberDiff line change
@@ -2448,7 +2448,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
24482448
let len = self.len();
24492449

24502450
if new_len > len {
2451-
self.extend_with(new_len - len, value)
2451+
self.extend_trusted(core::iter::repeat_n(value, new_len - len));
24522452
} else {
24532453
self.truncate(new_len);
24542454
}
@@ -2562,38 +2562,6 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
25622562
}
25632563
}
25642564

2565-
impl<T: Clone, A: Allocator> Vec<T, A> {
2566-
#[cfg(not(no_global_oom_handling))]
2567-
/// Extend the vector by `n` clones of value.
2568-
fn extend_with(&mut self, n: usize, value: T) {
2569-
self.reserve(n);
2570-
2571-
unsafe {
2572-
let mut ptr = self.as_mut_ptr().add(self.len());
2573-
// Use SetLenOnDrop to work around bug where compiler
2574-
// might not realize the store through `ptr` through self.set_len()
2575-
// don't alias.
2576-
let mut local_len = SetLenOnDrop::new(&mut self.len);
2577-
2578-
// Write all elements except the last one
2579-
for _ in 1..n {
2580-
ptr::write(ptr, value.clone());
2581-
ptr = ptr.add(1);
2582-
// Increment the length in every step in case clone() panics
2583-
local_len.increment_len(1);
2584-
}
2585-
2586-
if n > 0 {
2587-
// We can write the last element directly without cloning needlessly
2588-
ptr::write(ptr, value);
2589-
local_len.increment_len(1);
2590-
}
2591-
2592-
// len set by scope guard
2593-
}
2594-
}
2595-
}
2596-
25972565
impl<T: PartialEq, A: Allocator> Vec<T, A> {
25982566
/// Removes consecutive repeated elements in the vector according to the
25992567
/// [`PartialEq`] trait implementation.

Diff for: library/alloc/src/vec/spec_from_elem.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub(super) trait SpecFromElem: Sized {
1313
impl<T: Clone> SpecFromElem for T {
1414
default fn from_elem<A: Allocator>(elem: Self, n: usize, alloc: A) -> Vec<Self, A> {
1515
let mut v = Vec::with_capacity_in(n, alloc);
16-
v.extend_with(n, elem);
16+
v.extend_trusted(core::iter::repeat_n(elem, n));
1717
v
1818
}
1919
}
@@ -25,7 +25,7 @@ impl<T: Clone + IsZero> SpecFromElem for T {
2525
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
2626
}
2727
let mut v = Vec::with_capacity_in(n, alloc);
28-
v.extend_with(n, elem);
28+
v.extend_trusted(core::iter::repeat_n(elem, n));
2929
v
3030
}
3131
}

Diff for: library/core/src/iter/sources/repeat_n.rs

+30-9
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,34 @@ impl<A> Drop for RepeatN<A> {
110110
}
111111
}
112112

113+
trait SpecRepeatN<A> {
114+
unsafe fn spec_next_unchecked(&mut self) -> A;
115+
}
116+
117+
impl<A: Clone> SpecRepeatN<A> for RepeatN<A> {
118+
default unsafe fn spec_next_unchecked(&mut self) -> A {
119+
self.count -= 1;
120+
121+
if self.count == 0 {
122+
// SAFETY: we just lowered the count to zero so it won't be dropped
123+
// later, and thus it's okay to take it here.
124+
unsafe { ManuallyDrop::take(&mut self.element) }
125+
} else {
126+
A::clone(&self.element)
127+
}
128+
}
129+
}
130+
131+
impl<A: Copy> SpecRepeatN<A> for RepeatN<A> {
132+
unsafe fn spec_next_unchecked(&mut self) -> A {
133+
self.count -= 1;
134+
135+
// For `Copy` types, we can always just read the item directly,
136+
// so skip having a branch that would need to be optimized out.
137+
*self.element
138+
}
139+
}
140+
113141
#[unstable(feature = "iter_repeat_n", issue = "104434")]
114142
impl<A: Clone> Iterator for RepeatN<A> {
115143
type Item = A;
@@ -120,15 +148,8 @@ impl<A: Clone> Iterator for RepeatN<A> {
120148
return None;
121149
}
122150

123-
self.count -= 1;
124-
Some(if self.count == 0 {
125-
// SAFETY: the check above ensured that the count used to be non-zero,
126-
// so element hasn't been dropped yet, and we just lowered the count to
127-
// zero so it won't be dropped later, and thus it's okay to take it here.
128-
unsafe { ManuallyDrop::take(&mut self.element) }
129-
} else {
130-
A::clone(&self.element)
131-
})
151+
// SAFETY: Just confirmed above that the iterator is non-empty
152+
unsafe { Some(self.spec_next_unchecked()) }
132153
}
133154

134155
#[inline]

Diff for: tests/codegen/vec-resize-memset.rs

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// compile-flags: -O
2+
// no-system-llvm
3+
// only-64bit
4+
// ignore-debug (the extra assertions get in the way)
5+
6+
#![crate_type = "lib"]
7+
8+
pub fn resize_with_bytes_is_one_memset(x: &mut Vec<u8>) {
9+
let new_len = x.len() + 456789;
10+
x.resize(new_len, 123);
11+
}
12+
13+
// CHECK: call void @llvm.memset.p0.i64({{.+}}, i8 123, i64 456789, i1 false)

0 commit comments

Comments
 (0)