Skip to content

Commit

Permalink
refactor(rust): Migrate to hashbrown 0.15 (#19091)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp authored Oct 3, 2024
1 parent 04d4c5d commit f55658b
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 103 deletions.
76 changes: 48 additions & 28 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ fallible-streaming-iterator = "0.1.9"
fast-float = { version = "0.2" }
flate2 = { version = "1", default-features = false }
futures = "0.3.25"
hashbrown = { version = "=0.14.5", features = ["rayon", "ahash", "serde"] }
hashbrown = { version = "0.15.0", features = ["rayon", "serde"] }
# https://github.com/rust-lang/hashbrown/issues/564
hashbrown_old_nightly_hack = { package = "hashbrown", version = "0.14.5", features = ["rayon", "serde"] }
hex = "0.4.3"
indexmap = { version = "2", features = ["std", "serde"] }
itoa = "1.0.6"
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ chrono-tz = { workspace = true, optional = true }
comfy-table = { version = "7.1.1", default-features = false, optional = true }
either = { workspace = true }
hashbrown = { workspace = true }
hashbrown_old_nightly_hack = { workspace = true }
indexmap = { workspace = true }
ndarray = { workspace = true, optional = true }
num-traits = { workspace = true }
Expand All @@ -48,7 +49,7 @@ version_check = { workspace = true }

[features]
simd = ["arrow/simd", "polars-compute/simd"]
nightly = ["simd", "hashbrown/nightly", "polars-utils/nightly", "arrow/nightly"]
nightly = ["simd", "hashbrown/nightly", "hashbrown_old_nightly_hack/nightly", "polars-utils/nightly", "arrow/nightly"]
avx512 = []
docs = []
temporal = ["regex", "chrono", "polars-error/regex"]
Expand Down
54 changes: 21 additions & 33 deletions crates/polars-core/src/chunked_array/builder/list/categorical.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use hashbrown::hash_table::Entry;
use hashbrown::HashTable;

use super::*;

pub fn create_categorical_chunked_listbuilder(
Expand Down Expand Up @@ -75,15 +78,12 @@ impl ListBuilderTrait for ListEnumCategoricalChunkedBuilder {

struct ListLocalCategoricalChunkedBuilder {
inner: ListPrimitiveChunkedBuilder<UInt32Type>,
idx_lookup: PlHashMap<KeyWrapper, ()>,
idx_lookup: HashTable<u32>,
ordering: CategoricalOrdering,
categories: MutablePlString,
categories_hash: u128,
}

// Wrap u32 key to avoid incorrect usage of hashmap with custom lookup
struct KeyWrapper(u32);

impl ListLocalCategoricalChunkedBuilder {
#[inline]
pub fn get_hash_builder() -> PlRandomState {
Expand All @@ -104,10 +104,7 @@ impl ListLocalCategoricalChunkedBuilder {
values_capacity,
DataType::UInt32,
),
idx_lookup: PlHashMap::with_capacity_and_hasher(
capacity,
ListLocalCategoricalChunkedBuilder::get_hash_builder(),
),
idx_lookup: HashTable::with_capacity(capacity),
ordering,
categories: MutablePlString::with_capacity(capacity),
categories_hash: hash,
Expand Down Expand Up @@ -141,33 +138,24 @@ impl ListBuilderTrait for ListLocalCategoricalChunkedBuilder {

// Custom hashing / equality functions for comparing the &str to the idx
// SAFETY: index in hashmap are within bounds of categories
let r = unsafe {
self.idx_lookup.raw_table_mut().find_or_find_insert_slot(
unsafe {
let r = self.idx_lookup.entry(
hash_cat,
|(k, _)| self.categories.value_unchecked(k.0 as usize) == cat,
|(k, _): &(KeyWrapper, ())| {
hash_builder.hash_one(self.categories.value_unchecked(k.0 as usize))
|k| self.categories.value_unchecked(*k as usize) == cat,
|k| hash_builder.hash_one(self.categories.value_unchecked(*k as usize)),
);

match r {
Entry::Occupied(v) => {
// SAFETY: bucket is initialized.
idx_mapping.insert_unique_unchecked(idx as u32, *v.get());
},
Entry::Vacant(slot) => {
idx_mapping.insert_unique_unchecked(idx as u32, len as u32);
self.categories.push(Some(cat));
slot.insert(len as u32);
},
)
};

match r {
Ok(v) => {
// SAFETY: Bucket is initialized
idx_mapping.insert_unique_unchecked(idx as u32, unsafe { v.as_ref().0 .0 });
},
Err(e) => {
idx_mapping.insert_unique_unchecked(idx as u32, len as u32);
self.categories.push(Some(cat));
// SAFETY: No mutations in hashmap since find_or_find_insert_slot call
unsafe {
self.idx_lookup.raw_table_mut().insert_in_slot(
hash_cat,
e,
(KeyWrapper(len as u32), ()),
)
};
},
}
}
}

Expand Down
Loading

0 comments on commit f55658b

Please sign in to comment.