Skip to content

Commit

Permalink
Auto merge of #120050 - scottmcm:vec-resize-memset, r=<try>
Browse files Browse the repository at this point in the history
`Vec::resize` for bytes should be a single `memset`

Really I just started by trying to see if specializing `iter::repeat_n` would help the perf issue that kept me from removing `Vec::extend_with` last time I tried, but I noticed in the process that a resize for bytes doesn't set all the new space with a single `memset`: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=35175ec844b46fcd95e2d0aad526859e>

So using `repeat_n` to implement it -- like `VecDeque` uses, with the specialization for `next` to avoid a branch -- means that the optimizer notices the resize can set all the values with a single memset.
  • Loading branch information
bors committed Jan 17, 2024
2 parents f45fe57 + 8652062 commit 0db48a7
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 44 deletions.
34 changes: 1 addition & 33 deletions library/alloc/src/vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2448,7 +2448,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
let len = self.len();

if new_len > len {
self.extend_with(new_len - len, value)
self.extend_trusted(core::iter::repeat_n(value, new_len - len));
} else {
self.truncate(new_len);
}
Expand Down Expand Up @@ -2562,38 +2562,6 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
}
}

impl<T: Clone, A: Allocator> Vec<T, A> {
#[cfg(not(no_global_oom_handling))]
/// Extend the vector by `n` clones of value.
fn extend_with(&mut self, n: usize, value: T) {
self.reserve(n);

unsafe {
let mut ptr = self.as_mut_ptr().add(self.len());
// Use SetLenOnDrop to work around bug where compiler
// might not realize the store through `ptr` through self.set_len()
// don't alias.
let mut local_len = SetLenOnDrop::new(&mut self.len);

// Write all elements except the last one
for _ in 1..n {
ptr::write(ptr, value.clone());
ptr = ptr.add(1);
// Increment the length in every step in case clone() panics
local_len.increment_len(1);
}

if n > 0 {
// We can write the last element directly without cloning needlessly
ptr::write(ptr, value);
local_len.increment_len(1);
}

// len set by scope guard
}
}
}

impl<T: PartialEq, A: Allocator> Vec<T, A> {
/// Removes consecutive repeated elements in the vector according to the
/// [`PartialEq`] trait implementation.
Expand Down
4 changes: 2 additions & 2 deletions library/alloc/src/vec/spec_from_elem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(super) trait SpecFromElem: Sized {
impl<T: Clone> SpecFromElem for T {
default fn from_elem<A: Allocator>(elem: Self, n: usize, alloc: A) -> Vec<Self, A> {
let mut v = Vec::with_capacity_in(n, alloc);
v.extend_with(n, elem);
v.extend_trusted(core::iter::repeat_n(elem, n));
v
}
}
Expand All @@ -25,7 +25,7 @@ impl<T: Clone + IsZero> SpecFromElem for T {
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
}
let mut v = Vec::with_capacity_in(n, alloc);
v.extend_with(n, elem);
v.extend_trusted(core::iter::repeat_n(elem, n));
v
}
}
Expand Down
39 changes: 30 additions & 9 deletions library/core/src/iter/sources/repeat_n.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,34 @@ impl<A> Drop for RepeatN<A> {
}
}

trait SpecRepeatN<A> {
unsafe fn spec_next_unchecked(&mut self) -> A;
}

impl<A: Clone> SpecRepeatN<A> for RepeatN<A> {
default unsafe fn spec_next_unchecked(&mut self) -> A {
self.count -= 1;

if self.count == 0 {
// SAFETY: we just lowered the count to zero so it won't be dropped
// later, and thus it's okay to take it here.
unsafe { ManuallyDrop::take(&mut self.element) }
} else {
A::clone(&self.element)
}
}
}

impl<A: Copy> SpecRepeatN<A> for RepeatN<A> {
unsafe fn spec_next_unchecked(&mut self) -> A {
self.count -= 1;

// For `Copy` types, we can always just read the item directly,
// so skip having a branch that would need to be optimized out.
*self.element
}
}

#[unstable(feature = "iter_repeat_n", issue = "104434")]
impl<A: Clone> Iterator for RepeatN<A> {
type Item = A;
Expand All @@ -120,15 +148,8 @@ impl<A: Clone> Iterator for RepeatN<A> {
return None;
}

self.count -= 1;
Some(if self.count == 0 {
// SAFETY: the check above ensured that the count used to be non-zero,
// so element hasn't been dropped yet, and we just lowered the count to
// zero so it won't be dropped later, and thus it's okay to take it here.
unsafe { ManuallyDrop::take(&mut self.element) }
} else {
A::clone(&self.element)
})
// SAFETY: Just confirmed above that the iterator is non-empty
unsafe { Some(self.spec_next_unchecked()) }
}

#[inline]
Expand Down
13 changes: 13 additions & 0 deletions tests/codegen/vec-resize-memset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// compile-flags: -O
// no-system-llvm
// only-64bit
// ignore-debug (the extra assertions get in the way)

#![crate_type = "lib"]

pub fn resize_with_bytes_is_one_memset(x: &mut Vec<u8>) {
let new_len = x.len() + 456789;
x.resize(new_len, 123);
}

// CHECK: call void @llvm.memset.p0.i64({{.+}}, i8 123, i64 456789, i1 false)

0 comments on commit 0db48a7

Please sign in to comment.