Skip to content

Commit 27371af

Browse files
committed
Auto merge of rust-lang#120050 - scottmcm:vec-resize-memset, r=<try>
`Vec::resize` for bytes should be a single `memset` Really I just started by trying to see if specializing `iter::repeat_n` would help the perf issue that kept me from removing `Vec::extend_with` last time I tried, but I noticed in the process that a resize for bytes doesn't set all the new space with a single `memset`: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=35175ec844b46fcd95e2d0aad526859e> So using `repeat_n` to implement it -- like `VecDeque` uses, with the specialization for `next` to avoid a branch -- means that the optimizer notices the resize can set all the values with a single memset.
2 parents 6ae4cfb + 6c32590 commit 27371af

File tree

5 files changed

+81
-47
lines changed

5 files changed

+81
-47
lines changed

Diff for: library/alloc/src/vec/mod.rs

+2-34
Original file line numberDiff line numberDiff line change
@@ -2448,7 +2448,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
24482448
let len = self.len();
24492449

24502450
if new_len > len {
2451-
self.extend_with(new_len - len, value)
2451+
self.extend_trusted(core::iter::repeat_n(value, new_len - len));
24522452
} else {
24532453
self.truncate(new_len);
24542454
}
@@ -2562,38 +2562,6 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
25622562
}
25632563
}
25642564

2565-
impl<T: Clone, A: Allocator> Vec<T, A> {
2566-
#[cfg(not(no_global_oom_handling))]
2567-
/// Extend the vector by `n` clones of value.
2568-
fn extend_with(&mut self, n: usize, value: T) {
2569-
self.reserve(n);
2570-
2571-
unsafe {
2572-
let mut ptr = self.as_mut_ptr().add(self.len());
2573-
// Use SetLenOnDrop to work around bug where compiler
2574-
// might not realize the store through `ptr` through self.set_len()
2575-
// don't alias.
2576-
let mut local_len = SetLenOnDrop::new(&mut self.len);
2577-
2578-
// Write all elements except the last one
2579-
for _ in 1..n {
2580-
ptr::write(ptr, value.clone());
2581-
ptr = ptr.add(1);
2582-
// Increment the length in every step in case clone() panics
2583-
local_len.increment_len(1);
2584-
}
2585-
2586-
if n > 0 {
2587-
// We can write the last element directly without cloning needlessly
2588-
ptr::write(ptr, value);
2589-
local_len.increment_len(1);
2590-
}
2591-
2592-
// len set by scope guard
2593-
}
2594-
}
2595-
}
2596-
25972565
impl<T: PartialEq, A: Allocator> Vec<T, A> {
25982566
/// Removes consecutive repeated elements in the vector according to the
25992567
/// [`PartialEq`] trait implementation.
@@ -2919,7 +2887,7 @@ impl<T, A: Allocator> Vec<T, A> {
29192887
// Since the loop executes user code which can panic we have to update
29202888
// the length every step to correctly drop what we've written.
29212889
// NB can't overflow since we would have had to alloc the address space
2922-
local_len.increment_len(1);
2890+
local_len.increment_len_unchecked(1);
29232891
});
29242892
}
29252893
} else {

Diff for: library/alloc/src/vec/set_len_on_drop.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ impl<'a> SetLenOnDrop<'a> {
1414
SetLenOnDrop { local_len: *len, len }
1515
}
1616

17+
/// # Safety
18+
/// `self.current_len() + increment` must not overflow.
1719
#[inline]
18-
pub(super) fn increment_len(&mut self, increment: usize) {
19-
self.local_len += increment;
20+
pub(super) unsafe fn increment_len_unchecked(&mut self, increment: usize) {
21+
// SAFETY: This is our precondition
22+
self.local_len = unsafe { self.local_len.unchecked_add(increment) };
2023
}
2124

2225
#[inline]

Diff for: library/alloc/src/vec/spec_from_elem.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub(super) trait SpecFromElem: Sized {
1313
impl<T: Clone> SpecFromElem for T {
1414
default fn from_elem<A: Allocator>(elem: Self, n: usize, alloc: A) -> Vec<Self, A> {
1515
let mut v = Vec::with_capacity_in(n, alloc);
16-
v.extend_with(n, elem);
16+
v.extend_trusted(core::iter::repeat_n(elem, n));
1717
v
1818
}
1919
}
@@ -25,7 +25,7 @@ impl<T: Clone + IsZero> SpecFromElem for T {
2525
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
2626
}
2727
let mut v = Vec::with_capacity_in(n, alloc);
28-
v.extend_with(n, elem);
28+
v.extend_trusted(core::iter::repeat_n(elem, n));
2929
v
3030
}
3131
}

Diff for: library/core/src/iter/sources/repeat_n.rs

+59-9
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,58 @@ impl<A> Drop for RepeatN<A> {
110110
}
111111
}
112112

113+
trait SpecRepeatN<A> {
114+
unsafe fn spec_next_unchecked(&mut self) -> A;
115+
fn spec_fold<B, F: FnMut(B, A) -> B>(self, b: B, f: F) -> B;
116+
}
117+
118+
impl<A: Clone> SpecRepeatN<A> for RepeatN<A> {
119+
default unsafe fn spec_next_unchecked(&mut self) -> A {
120+
self.count -= 1;
121+
122+
if self.count == 0 {
123+
// SAFETY: we just lowered the count to zero so it won't be dropped
124+
// later, and thus it's okay to take it here.
125+
unsafe { ManuallyDrop::take(&mut self.element) }
126+
} else {
127+
A::clone(&self.element)
128+
}
129+
}
130+
131+
default fn spec_fold<B, F: FnMut(B, A) -> B>(mut self, mut b: B, mut f: F) -> B {
132+
let mut count = self.count;
133+
if let Some(element) = self.take_element() {
134+
while count > 1 {
135+
count -= 1;
136+
b = f(b, element.clone());
137+
}
138+
b = f(b, element);
139+
}
140+
b
141+
}
142+
}
143+
144+
impl<A: Copy> SpecRepeatN<A> for RepeatN<A> {
145+
unsafe fn spec_next_unchecked(&mut self) -> A {
146+
self.count -= 1;
147+
148+
// For `Copy` types, we can always just read the item directly,
149+
// so skip having a branch that would need to be optimized out.
150+
*self.element
151+
}
152+
153+
fn spec_fold<B, F: FnMut(B, A) -> B>(self, mut b: B, mut f: F) -> B {
154+
let mut count = self.count;
155+
let element = *self.element;
156+
157+
while count > 0 {
158+
count -= 1;
159+
b = f(b, element);
160+
}
161+
b
162+
}
163+
}
164+
113165
#[unstable(feature = "iter_repeat_n", issue = "104434")]
114166
impl<A: Clone> Iterator for RepeatN<A> {
115167
type Item = A;
@@ -120,15 +172,8 @@ impl<A: Clone> Iterator for RepeatN<A> {
120172
return None;
121173
}
122174

123-
self.count -= 1;
124-
Some(if self.count == 0 {
125-
// SAFETY: the check above ensured that the count used to be non-zero,
126-
// so element hasn't been dropped yet, and we just lowered the count to
127-
// zero so it won't be dropped later, and thus it's okay to take it here.
128-
unsafe { ManuallyDrop::take(&mut self.element) }
129-
} else {
130-
A::clone(&self.element)
131-
})
175+
// SAFETY: Just confirmed above that the iterator is non-empty
176+
unsafe { Some(self.spec_next_unchecked()) }
132177
}
133178

134179
#[inline]
@@ -154,6 +199,11 @@ impl<A: Clone> Iterator for RepeatN<A> {
154199
}
155200
}
156201

202+
#[inline]
203+
fn fold<B, F: FnMut(B, A) -> B>(self, init: B, f: F) -> B {
204+
self.spec_fold(init, f)
205+
}
206+
157207
#[inline]
158208
fn last(mut self) -> Option<A> {
159209
self.take_element()

Diff for: tests/codegen/vec-resize-memset.rs

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// compile-flags: -O
2+
// no-system-llvm
3+
// only-64bit
4+
// ignore-debug (the extra assertions get in the way)
5+
6+
#![crate_type = "lib"]
7+
8+
pub fn resize_with_bytes_is_one_memset(x: &mut Vec<u8>) {
9+
let new_len = x.len() + 456789;
10+
x.resize(new_len, 123);
11+
}
12+
13+
// CHECK: call void @llvm.memset.p0.i64({{.+}}, i8 123, i64 456789, i1 false)

0 commit comments

Comments
 (0)