Skip to content

Commit

Permalink
std: Add retain method for HashMap and HashSet
Browse files Browse the repository at this point in the history
Fix #36648
  • Loading branch information
F001 committed Feb 8, 2017
1 parent ea7a648 commit d90a7b3
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 46 deletions.
114 changes: 75 additions & 39 deletions src/libstd/collections/hash/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,22 +416,26 @@ fn search_hashed<K, V, M, F>(table: M, hash: SafeHash, mut is_match: F) -> Inter
}
}

fn pop_internal<K, V>(starting_bucket: FullBucketMut<K, V>) -> (K, V) {
fn pop_internal<K, V>(starting_bucket: FullBucketMut<K, V>)
-> (K, V, &mut RawTable<K, V>)
{
let (empty, retkey, retval) = starting_bucket.take();
let mut gap = match empty.gap_peek() {
Some(b) => b,
None => return (retkey, retval),
Ok(b) => b,
Err(b) => return (retkey, retval, b.into_table()),
};

while gap.full().displacement() != 0 {
gap = match gap.shift() {
Some(b) => b,
None => break,
Ok(b) => b,
Err(b) => {
return (retkey, retval, b.into_table());
},
};
}

// Now we've done all our shifting. Return the value we grabbed earlier.
(retkey, retval)
(retkey, retval, gap.into_bucket().into_table())
}

/// Perform robin hood bucket stealing at the given `bucket`. You must
Expand Down Expand Up @@ -721,38 +725,7 @@ impl<K, V, S> HashMap<K, V, S>
return;
}

// Grow the table.
// Specialization of the other branch.
let mut bucket = Bucket::first(&mut old_table);

// "So a few of the first shall be last: for many be called,
// but few chosen."
//
// We'll most likely encounter a few buckets at the beginning that
// have their initial buckets near the end of the table. They were
// placed at the beginning as the probe wrapped around the table
// during insertion. We must skip forward to a bucket that won't
// get reinserted too early and won't unfairly steal others spot.
// This eliminates the need for robin hood.
loop {
bucket = match bucket.peek() {
Full(full) => {
if full.displacement() == 0 {
// This bucket occupies its ideal spot.
// It indicates the start of another "cluster".
bucket = full.into_bucket();
break;
}
// Leaving this bucket in the last cluster for later.
full.into_bucket()
}
Empty(b) => {
// Encountered a hole between clusters.
b.into_bucket()
}
};
bucket.next();
}
let mut bucket = Bucket::head_bucket(&mut old_table);

// This is how the buckets might be laid out in memory:
// ($ marks an initialized bucket)
Expand Down Expand Up @@ -1208,6 +1181,57 @@ impl<K, V, S> HashMap<K, V, S>

self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket).1)
}

/// Retains only the elements specified by the predicate.
///
/// In other words, remove all pairs `(k, v)` such that `f(&k,&mut v)` returns `false`.
///
/// # Examples
///
/// ```
/// #![feature(retain_hash_collection)]
/// use std::collections::HashMap;
///
/// let mut map: HashMap<isize, isize> = (0..8).map(|x|(x, x*10)).collect();
/// map.retain(|&k, _| k % 2 == 0);
/// assert_eq!(map.len(), 4);
/// ```
#[unstable(feature = "retain_hash_collection", issue = "36648")]
pub fn retain<F>(&mut self, mut f: F)
where F: FnMut(&K, &mut V) -> bool
{
if self.table.capacity() == 0 || self.table.size() == 0 {
return;
}
let mut bucket = Bucket::head_bucket(&mut self.table);
bucket.prev();
let tail = bucket.index();
loop {
bucket = match bucket.peek() {
Full(mut full) => {
let should_remove = {
let (k, v) = full.read_mut();
!f(k, v)
};
if should_remove {
let prev_idx = full.index();
let prev_raw = full.raw();
let (_, _, t) = pop_internal(full);
Bucket::new_from(prev_raw, prev_idx, t)
} else {
full.into_bucket()
}
},
Empty(b) => {
b.into_bucket()
}
};
bucket.prev(); // reverse iteration
if bucket.index() == tail {
break;
}
}
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -1862,7 +1886,8 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
/// ```
#[stable(feature = "map_entry_recover_keys2", since = "1.12.0")]
pub fn remove_entry(self) -> (K, V) {
pop_internal(self.elem)
let (k, v, _) = pop_internal(self.elem);
(k, v)
}

/// Gets a reference to the value in the entry.
Expand Down Expand Up @@ -3156,4 +3181,15 @@ mod test_map {
assert_eq!(a.len(), 1);
assert_eq!(a[key], value);
}

#[test]
fn test_retain() {
let mut map: HashMap<isize, isize> = (0..100).map(|x|(x, x*10)).collect();

map.retain(|&k, _| k % 2 == 0);
assert_eq!(map.len(), 50);
assert_eq!(map[&2], 20);
assert_eq!(map[&4], 40);
assert_eq!(map[&6], 60);
}
}
33 changes: 33 additions & 0 deletions src/libstd/collections/hash/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,28 @@ impl<T, S> HashSet<T, S>
{
Recover::take(&mut self.map, value)
}

/// Retains only the elements specified by the predicate.
///
/// In other words, remove all elements `e` such that `f(&e)` returns `false`.
///
/// # Examples
///
/// ```
/// #![feature(retain_hash_collection)]
/// use std::collections::HashSet;
///
/// let xs = [1,2,3,4,5,6];
/// let mut set: HashSet<isize> = xs.iter().cloned().collect();
/// set.retain(|&k| k % 2 == 0);
/// assert_eq!(set.len(), 3);
/// ```
#[unstable(feature = "retain_hash_collection", issue = "36648")]
pub fn retain<F>(&mut self, mut f: F)
where F: FnMut(&T) -> bool
{
self.map.retain(|k, _| f(k));
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -1605,4 +1627,15 @@ mod test_set {
assert!(a.contains(&5));
assert!(a.contains(&6));
}

#[test]
fn test_retain() {
let xs = [1,2,3,4,5,6];
let mut set: HashSet<isize> = xs.iter().cloned().collect();
set.retain(|&k| k % 2 == 0);
assert_eq!(set.len(), 3);
assert!(set.contains(&2));
assert!(set.contains(&4));
assert!(set.contains(&6));
}
}
81 changes: 74 additions & 7 deletions src/libstd/collections/hash/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ pub struct RawTable<K, V> {
unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}

struct RawBucket<K, V> {
pub struct RawBucket<K, V> {
hash: *mut HashUint,
// We use *const to ensure covariance with respect to K and V
pair: *const (K, V),
Expand Down Expand Up @@ -216,6 +216,10 @@ impl<K, V, M> FullBucket<K, V, M> {
pub fn index(&self) -> usize {
self.idx
}
/// Get the raw bucket.
pub fn raw(&self) -> RawBucket<K, V> {
self.raw
}
}

impl<K, V, M> EmptyBucket<K, V, M> {
Expand All @@ -230,6 +234,10 @@ impl<K, V, M> Bucket<K, V, M> {
pub fn index(&self) -> usize {
self.idx
}
/// get the table.
pub fn into_table(self) -> M {
self.table
}
}

impl<K, V, M> Deref for FullBucket<K, V, M>
Expand Down Expand Up @@ -275,6 +283,16 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
Bucket::at_index(table, hash.inspect() as usize)
}

pub fn new_from(r: RawBucket<K, V>, i: usize, t: M)
-> Bucket<K, V, M>
{
Bucket {
raw: r,
idx: i,
table: t,
}
}

pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> {
// if capacity is 0, then the RawBucket will be populated with bogus pointers.
// This is an uncommon case though, so avoid it in release builds.
Expand All @@ -296,6 +314,40 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
}
}

// "So a few of the first shall be last: for many be called,
// but few chosen."
//
// We'll most likely encounter a few buckets at the beginning that
// have their initial buckets near the end of the table. They were
// placed at the beginning as the probe wrapped around the table
// during insertion. We must skip forward to a bucket that won't
// get reinserted too early and won't unfairly steal others spot.
// This eliminates the need for robin hood.
pub fn head_bucket(table: M) -> Bucket<K, V, M> {
let mut bucket = Bucket::first(table);

loop {
bucket = match bucket.peek() {
Full(full) => {
if full.displacement() == 0 {
// This bucket occupies its ideal spot.
// It indicates the start of another "cluster".
bucket = full.into_bucket();
break;
}
// Leaving this bucket in the last cluster for later.
full.into_bucket()
}
Empty(b) => {
// Encountered a hole between clusters.
b.into_bucket()
}
};
bucket.next();
}
bucket
}

/// Reads a bucket at a given index, returning an enum indicating whether
/// it's initialized or not. You need to match on this enum to get
/// the appropriate types to call most of the other functions in
Expand Down Expand Up @@ -333,6 +385,17 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
self.raw = self.raw.offset(dist);
}
}

/// Modifies the bucket pointer in place to make it point to the previous slot.
pub fn prev(&mut self) {
let range = self.table.capacity();
let new_idx = self.idx.wrapping_sub(1) & (range - 1);
let dist = (new_idx as isize).wrapping_sub(self.idx as isize);
self.idx = new_idx;
unsafe {
self.raw = self.raw.offset(dist);
}
}
}

impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
Expand All @@ -352,7 +415,7 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
}
}

pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> {
pub fn gap_peek(self) -> Result<GapThenFull<K, V, M>, Bucket<K, V, M>> {
let gap = EmptyBucket {
raw: self.raw,
idx: self.idx,
Expand All @@ -361,12 +424,12 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {

match self.next().peek() {
Full(bucket) => {
Some(GapThenFull {
Ok(GapThenFull {
gap: gap,
full: bucket,
})
}
Empty(..) => None,
Empty(e) => Err(e.into_bucket()),
}
}
}
Expand Down Expand Up @@ -529,7 +592,11 @@ impl<K, V, M> GapThenFull<K, V, M>
&self.full
}

pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
pub fn into_bucket(self) -> Bucket<K, V, M> {
self.full.into_bucket()
}

pub fn shift(mut self) -> Result<GapThenFull<K, V, M>, Bucket<K, V, M>> {
unsafe {
*self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1);
Expand All @@ -544,9 +611,9 @@ impl<K, V, M> GapThenFull<K, V, M>

self.full = bucket;

Some(self)
Ok(self)
}
Empty(..) => None,
Empty(b) => Err(b.into_bucket()),
}
}
}
Expand Down

0 comments on commit d90a7b3

Please sign in to comment.