Skip to content

Commit f5c3755

Browse files
committed
Auto merge of #120050 - scottmcm:vec-resize-memset, r=<try>
`Vec::resize` for bytes should be a single `memset` Really I just started by trying to see if specializing `iter::repeat_n` would help the perf issue that kept me from removing `Vec::extend_with` last time I tried, but I noticed in the process that a resize for bytes doesn't set all the new space with a single `memset`: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=35175ec844b46fcd95e2d0aad526859e> So using `repeat_n` to implement it -- like `VecDeque` uses, with the specialization for `next` to avoid a branch -- means that the optimizer notices the resize can set all the values with a single memset.
2 parents 60c3673 + 1697af2 commit f5c3755

File tree

4 files changed

+86
-39
lines changed

4 files changed

+86
-39
lines changed

Diff for: library/alloc/src/vec/mod.rs

+34-22
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@ use self::spec_extend::SpecExtend;
145145
#[cfg(not(no_global_oom_handling))]
146146
mod spec_extend;
147147

148+
#[cfg(not(no_global_oom_handling))]
149+
use self::spec_extend_elem::SpecExtendElem;
150+
151+
#[cfg(not(no_global_oom_handling))]
152+
mod spec_extend_elem;
153+
148154
/// A contiguous growable array type, written as `Vec<T>`, short for 'vector'.
149155
///
150156
/// # Examples
@@ -2871,7 +2877,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
28712877
let len = self.len();
28722878

28732879
if new_len > len {
2874-
self.extend_with(new_len - len, value)
2880+
self.extend_elem(new_len - len, value)
28752881
} else {
28762882
self.truncate(new_len);
28772883
}
@@ -2985,32 +2991,38 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
29852991

29862992
impl<T: Clone, A: Allocator> Vec<T, A> {
29872993
#[cfg(not(no_global_oom_handling))]
2988-
/// Extend the vector by `n` clones of value.
2989-
fn extend_with(&mut self, n: usize, value: T) {
2994+
#[inline]
2995+
/// Extend the vector by `n` clones of `value`.
2996+
///
2997+
/// Uses specialization to dispatch to `extend_elem_copy` if possible.
2998+
fn extend_elem(&mut self, n: usize, value: T) {
2999+
self.spec_extend_elem(n, value);
3000+
}
3001+
3002+
#[cfg(not(no_global_oom_handling))]
3003+
#[inline]
3004+
/// Extend the vector by `n` *copies* of `value`.
3005+
fn extend_elem_copy(&mut self, n: usize, value: T)
3006+
where
3007+
T: Copy,
3008+
{
29903009
self.reserve(n);
29913010

3011+
// SAFETY: We now have space for all the elements, so the pointer math
3012+
// is all in-bounds. Because `T: Copy`, there's no user code being run
3013+
// here (no `clone` nor any `drop` in the assignment).
29923014
unsafe {
2993-
let mut ptr = self.as_mut_ptr().add(self.len());
2994-
// Use SetLenOnDrop to work around bug where compiler
2995-
// might not realize the store through `ptr` through self.set_len()
2996-
// don't alias.
2997-
let mut local_len = SetLenOnDrop::new(&mut self.len);
2998-
2999-
// Write all elements except the last one
3000-
for _ in 1..n {
3001-
ptr::write(ptr, value.clone());
3002-
ptr = ptr.add(1);
3003-
// Increment the length in every step in case clone() panics
3004-
local_len.increment_len(1);
3005-
}
3006-
3007-
if n > 0 {
3008-
// We can write the last element directly without cloning needlessly
3009-
ptr::write(ptr, value);
3010-
local_len.increment_len(1);
3015+
// Because there's no user code being run here, we can skip it for ZSTs.
3016+
// That helps tests in debug mode that do things like `vec![(); HUGE]`.
3017+
// See <https://github.com/rust-lang/rust/pull/118094>
3018+
if !T::IS_ZST {
3019+
let ptr = self.as_mut_ptr().add(self.len);
3020+
for i in 0..n {
3021+
*ptr.add(i) = value;
3022+
}
30113023
}
30123024

3013-
// len set by scope guard
3025+
self.len += n;
30143026
}
30153027
}
30163028
}

Diff for: library/alloc/src/vec/spec_extend_elem.rs

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
use core::iter::repeat_n;
2+
3+
use super::Vec;
4+
use crate::alloc::Allocator;
5+
6+
// Specialization trait used for Vec::extend_elem
7+
pub(super) trait SpecExtendElem<T> {
8+
fn spec_extend_elem(&mut self, n: usize, value: T);
9+
}
10+
11+
impl<T, A: Allocator> SpecExtendElem<T> for Vec<T, A>
12+
where
13+
T: Clone,
14+
{
15+
default fn spec_extend_elem(&mut self, n: usize, value: T) {
16+
self.extend_trusted(repeat_n(value, n))
17+
}
18+
}
19+
20+
impl<T, A: Allocator> SpecExtendElem<T> for Vec<T, A>
21+
where
22+
T: Copy,
23+
{
24+
fn spec_extend_elem(&mut self, n: usize, value: T) {
25+
self.extend_elem_copy(n, value)
26+
}
27+
}

Diff for: library/alloc/src/vec/spec_from_elem.rs

+2-17
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ pub(super) trait SpecFromElem: Sized {
1212
impl<T: Clone> SpecFromElem for T {
1313
default fn from_elem<A: Allocator>(elem: Self, n: usize, alloc: A) -> Vec<Self, A> {
1414
let mut v = Vec::with_capacity_in(n, alloc);
15-
v.extend_with(n, elem);
15+
v.extend_elem(n, elem);
1616
v
1717
}
1818
}
@@ -24,7 +24,7 @@ impl<T: Clone + IsZero> SpecFromElem for T {
2424
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
2525
}
2626
let mut v = Vec::with_capacity_in(n, alloc);
27-
v.extend_with(n, elem);
27+
v.extend_elem(n, elem);
2828
v
2929
}
3030
}
@@ -58,18 +58,3 @@ impl SpecFromElem for u8 {
5858
v
5959
}
6060
}
61-
62-
// A better way would be to implement this for all ZSTs which are `Copy` and have trivial `Clone`
63-
// but the latter cannot be detected currently
64-
impl SpecFromElem for () {
65-
#[inline]
66-
fn from_elem<A: Allocator>(_elem: (), n: usize, alloc: A) -> Vec<(), A> {
67-
let mut v = Vec::with_capacity_in(n, alloc);
68-
// SAFETY: the capacity has just been set to `n`
69-
// and `()` is a ZST with trivial `Clone` implementation
70-
unsafe {
71-
v.set_len(n);
72-
}
73-
v
74-
}
75-
}

Diff for: tests/codegen/vec-of-bytes-memset.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//@ compile-flags: -O
2+
//@ only-64bit
3+
4+
#![crate_type = "lib"]
5+
6+
// CHECK-LABEL: @resize_bytes_is_one_memset
7+
#[no_mangle]
8+
pub fn resize_bytes_is_one_memset(x: &mut Vec<u8>) {
9+
// CHECK: call void @llvm.memset.p0.i64({{.+}}, i8 123, i64 456789, i1 false)
10+
let new_len = x.len() + 456789;
11+
x.resize(new_len, 123);
12+
}
13+
14+
#[derive(Copy, Clone)]
15+
struct ByteNewtype(i8);
16+
17+
// CHECK-LABEL: @from_elem_is_one_memset
18+
#[no_mangle]
19+
pub fn from_elem_is_one_memset() -> Vec<ByteNewtype> {
20+
// CHECK: %[[P:.+]] = tail call{{.+}}@__rust_alloc(i64 noundef 123456, i64 noundef 1)
21+
// CHECK: call void @llvm.memset.p0.i64({{.+}} %[[P]], i8 42, i64 123456, i1 false)
22+
vec![ByteNewtype(42); 123456]
23+
}

0 commit comments

Comments
 (0)