diff --git a/src/robin_dict.jl b/src/robin_dict.jl index 529a97cf..e4c49ed5 100644 --- a/src/robin_dict.jl +++ b/src/robin_dict.jl @@ -1,5 +1,7 @@ # the load factor after which the dictionary `rehash` happens const ROBIN_DICT_LOAD_FACTOR = 0.70 +const DIBS_BYTES = 8 +const DIBS_MASK = 0x0000_00FF """ RobinDict([itr]) @@ -27,7 +29,7 @@ RobinDict{String, Int64} with 2 entries: ``` """ mutable struct RobinDict{K,V} <: AbstractDict{K,V} - hashes::Vector{UInt32} + meta::Vector{UInt32} keys::Array{K,1} vals::Array{V,1} count::Int @@ -40,7 +42,7 @@ function RobinDict{K, V}() where {K, V} end function RobinDict{K, V}(d::RobinDict{K, V}) where {K, V} - RobinDict{K, V}(copy(d.hashes), copy(d.keys), copy(d.vals), d.count, d.idxfloor) + RobinDict{K, V}(copy(d.meta), copy(d.keys), copy(d.vals), d.count, d.idxfloor) end function RobinDict{K,V}(kv) where V where K @@ -83,28 +85,32 @@ function RobinDict(kv) end end -hash_key(key) = (hash(key)%UInt32) | 0x80000000 +Base.@propagate_inbounds isslotfilled(h::RobinDict, index) = !iszero(h.meta[index]) +Base.@propagate_inbounds isslotempty(h::RobinDict, index) = iszero(h.meta[index]) + +hash_key(key) = (hash(key)%UInt32) desired_index(hash, sz) = (hash & (sz - 1)) + 1 -function calculate_distance(h::RobinDict{K, V}, index) where {K, V} - @assert isslotfilled(h, index) - sz = length(h.keys) - @inbounds index_init = desired_index(h.hashes[index], sz) - return (index - index_init + sz) & (sz - 1) -end +function make_meta(hash::UInt32, dibs::Int) + meta = hash + meta = (meta << DIBS_BYTES) | (dibs % UInt32) + return meta +end + +hash_meta(meta::UInt32) = (meta>>DIBS_BYTES) +dibs_meta(meta::UInt32) = Int(meta & DIBS_MASK) + +Base.@propagate_inbounds calculate_distance(h::RobinDict, index) = dibs_meta(h.meta[index]) # insert algorithm -function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V} +function rh_insert!(h::RobinDict{K, V}, key::K, val::V, hash = hash_key(key)) where {K, V} sz = length(h.keys) (h.count > ROBIN_DICT_LOAD_FACTOR * sz) && rehash!(h, sz<<2) - - # table full - @assert h.count != length(h.keys) - - ckey, cval, chash = key, val, hash_key(key) + + ckey, cval, chash = key, val, hash + cmeta = make_meta(chash, 0) sz = length(h.keys) - index_init = desired_index(chash, sz) - + index_init = desired_index(hash_meta(cmeta), sz) index_curr = index_init probe_distance = 0 probe_current = 0 @@ -117,13 +123,14 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V} if probe_current > probe_distance h.vals[index_curr], cval = cval, h.vals[index_curr] h.keys[index_curr], ckey = ckey, h.keys[index_curr] - h.hashes[index_curr], chash = chash, h.hashes[index_curr] + cmeta = hash_meta(cmeta) + h.meta[index_curr], cmeta = make_meta(cmeta, probe_current), h.meta[index_curr] probe_current = probe_distance end probe_current += 1 index_curr = (index_curr & (sz - 1)) + 1 end - + @inbounds if isslotfilled(h, index_curr) && isequal(h.keys[index_curr], ckey) h.vals[index_curr] = cval return index_curr @@ -135,10 +142,9 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V} @inbounds h.vals[index_curr] = cval @inbounds h.keys[index_curr] = ckey - @inbounds h.hashes[index_curr] = chash - - @assert probe_current >= 0 - + cmeta = hash_meta(cmeta) + @inbounds h.meta[index_curr] = make_meta(cmeta, probe_current) + if h.idxfloor == 0 h.idxfloor = index_curr else @@ -147,63 +153,18 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V} return index_curr end -function rh_insert_for_rehash!(h_new::RobinDict{K, V}, key::K, val::V, hash::UInt32) where {K, V} - # table full - @assert h_new.count != length(h_new.keys) - - ckey, cval, chash = key, val, hash - sz = length(h_new.keys) - index_init = desired_index(chash, sz) - - index_curr = index_init - probe_distance = 0 - probe_current = 0 - @inbounds while true - if (isslotempty(h_new, index_curr)) - break - end - probe_distance = calculate_distance(h_new, index_curr) - - if probe_current > probe_distance - h_new.vals[index_curr], cval = cval, h_new.vals[index_curr] - h_new.keys[index_curr], ckey = ckey, h_new.keys[index_curr] - h_new.hashes[index_curr], chash = chash, h_new.hashes[index_curr] - probe_current = probe_distance - end - probe_current += 1 - index_curr = (index_curr & (sz - 1)) + 1 - end - - @inbounds if isslotempty(h_new, index_curr) - h_new.count += 1 - end - - @inbounds h_new.vals[index_curr] = cval - @inbounds h_new.keys[index_curr] = ckey - @inbounds h_new.hashes[index_curr] = chash - - @assert probe_current >= 0 - - if h_new.idxfloor == 0 - h_new.idxfloor = index_curr - else - h_new.idxfloor = min(h_new.idxfloor, index_curr) - end - return index_curr -end - #rehash! algorithm function rehash!(h::RobinDict{K,V}, newsz = length(h.keys)) where {K, V} oldk = h.keys oldv = h.vals - oldh = h.hashes + oldmeta = h.meta sz = length(oldk) newsz = _tablesz(newsz) if h.count == 0 resize!(h.keys, newsz) resize!(h.vals, newsz) - resize!(h.hashes, newsz) - fill!(h.hashes, 0) + resize!(h.meta, newsz) + fill!(h.meta, 0) h.count = 0 h.idxfloor = 0 return h @@ -211,15 +172,15 @@ function rehash!(h::RobinDict{K,V}, newsz = length(h.keys)) where {K, V} h.keys = Vector{K}(undef, newsz) h.vals = Vector{V}(undef, newsz) - h.hashes = zeros(UInt32,newsz) + h.meta = zeros(UInt32, newsz) h.count = 0 h.idxfloor = 0 for i = 1:sz - @inbounds if oldh[i] != 0 + @inbounds if oldmeta[i] != 0 k = oldk[i] v = oldv[i] - rh_insert_for_rehash!(h, k, v, oldh[i]) + rh_insert!(h, k, v, oldmeta[i] >> DIBS_BYTES) end end return h @@ -235,10 +196,6 @@ function Base.sizehint!(d::RobinDict, newsz) rehash!(d, newsz) end -Base.@propagate_inbounds isslotfilled(h::RobinDict, index) = (h.hashes[index] != 0) -Base.@propagate_inbounds isslotempty(h::RobinDict, index) = (h.hashes[index] == 0) - - function Base.setindex!(h::RobinDict{K,V}, v0, key0) where {K, V} key = convert(K, key0) isequal(key, key0) || throw(ArgumentError("$key0 is not a valid key for type $K")) @@ -249,7 +206,7 @@ function _setindex!(h::RobinDict{K,V}, key::K, v0) where {K, V} v = convert(V, v0) index = rh_insert!(h, key, v) @assert index > 0 - return h + h end """ @@ -273,31 +230,38 @@ RobinDict{String, Int64}() """ function Base.empty!(h::RobinDict{K,V}) where {K, V} sz = length(h.keys) - empty!(h.hashes) + empty!(h.meta) empty!(h.keys) empty!(h.vals) resize!(h.keys, sz) resize!(h.vals, sz) - resize!(h.hashes, sz) - fill!(h.hashes, 0) + resize!(h.meta, sz) + fill!(h.meta, 0) h.count = 0 h.idxfloor = 0 return h end -function rh_search(h::RobinDict{K, V}, key) where {K, V} +Base.@propagate_inbounds function rh_search(h::RobinDict{K, V}, key) where {K, V} sz = length(h.keys) chash = hash_key(key) - index = desired_index(chash, sz) - cdibs = 0 + cmeta = make_meta(chash, 0) + chash_meta = hash_meta(cmeta) + index = desired_index(chash_meta, sz) + probe_current = 0 + @inbounds while true - if isslotempty(h, index) - return -1 - elseif cdibs > calculate_distance(h, index) - return -1 - elseif h.hashes[index] == chash && (h.keys[index] === key || isequal(h.keys[index], key)) - return index + isslotempty(h, index) && return -1 + + probe_current > calculate_distance(h, index) && return -1 + + if hash_meta(h.meta[index]) == chash_meta + if (key === h.keys[index] || isequal(h.keys[index], key)) + return index + end end + + probe_current += 1 index = (index & (sz - 1)) + 1 end end @@ -326,7 +290,6 @@ RobinDict{String, Int64} with 4 entries: "d" => 4 ``` """ -Base.get!(collection, key, default) Base.get!(h::RobinDict{K,V}, key0, default) where {K,V} = get!(()->default, h, key0) @@ -344,7 +307,6 @@ get!(dict, key) do end ``` """ -Base.get!(f::Function, collection, key) function Base.get!(default::Callable, h::RobinDict{K,V}, key0::K) where {K, V} key = convert(K, key0) @@ -353,7 +315,7 @@ end function _get!(default::Callable, h::RobinDict{K,V}, key::K) where V where K index = rh_search(h, key) - + index > 0 && return h.vals[index] v = convert(V, default()) @@ -458,37 +420,39 @@ end # backward shift deletion by not keeping any tombstones function rh_delete!(h::RobinDict{K, V}, index) where {K, V} @assert index > 0 - # this assumes that there is a key/value present in the dictionary at index + index0 = index sz = length(h.keys) @inbounds while true index0 = (index0 & (sz - 1)) + 1 - if isslotempty(h, index0) || calculate_distance(h, index0) == 0 + if isslotempty(h, index0) || iszero(dibs_meta(h.meta[index0])) break end end - #index0 represents the position before which we have to shift backwards - + #index0 represents the position before which we have to shift backwards + # the backwards shifting algorithm curr = index next = (index & (sz - 1)) + 1 - + @inbounds while next != index0 h.vals[curr] = h.vals[next] h.keys[curr] = h.keys[next] - h.hashes[curr] = h.hashes[next] + mmeta = h.meta[next] + mdibs = dibs_meta(mmeta) + h.meta[curr] = make_meta(hash_meta(mmeta), mdibs-1) curr = next next = (next & (sz-1)) + 1 end - + #curr is at the last position, reset back to normal - isbitstype(K) || isbitsunion(K) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.keys, curr-1) - isbitstype(V) || isbitsunion(V) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.vals, curr-1) - @inbounds h.hashes[curr] = 0x0 + isbitstype(K) || isbitsunion(K) || ccall(:jl_arrayunset, Cvoid, (Any, UInt32), h.keys, curr-1) + isbitstype(V) || isbitsunion(V) || ccall(:jl_arrayunset, Cvoid, (Any, UInt32), h.vals, curr-1) + @inbounds h.meta[curr] = zero(UInt32) h.count -= 1 - # this is necessary because key at idxfloor might get deleted + # this is necessary because key at idxfloor might get deleted h.idxfloor = get_next_filled(h, h.idxfloor) return h end @@ -538,7 +502,7 @@ function Base.pop!(h::RobinDict) @inbounds key = h.keys[idx] @inbounds val = h.vals[idx] rh_delete!(h, idx) - return key => val + key => val end """ diff --git a/test/test_robin_dict.jl b/test/test_robin_dict.jl index 909ac34f..1b88e2b0 100644 --- a/test/test_robin_dict.jl +++ b/test/test_robin_dict.jl @@ -5,7 +5,7 @@ @test h1.idxfloor == 0 @test length(h1.keys) == 16 @test length(h1.vals) == 16 - @test length(h1.hashes) == 16 + @test length(h1.meta) == 16 @test eltype(h1) == Pair{Any, Any} @test keytype(h1) == Any @test valtype(h1) == Any @@ -334,11 +334,11 @@ end for i=1:1000 h[i] = i+1 end - length0 = length(h.hashes) + length0 = length(h.meta) empty!(h) @test h.count == 0 @test h.idxfloor == 0 - @test length(h.hashes) == length(h.keys) == length(h.vals) == length0 + @test length(h.meta) == length(h.keys) == length(h.vals) == length0 for i=-1000:1000 @test !haskey(h, i) end @@ -379,16 +379,18 @@ end @testset "invariants" begin # Functions which are not exported, but are required for checking invariants - hash_key(key) = (hash(key)%UInt32) | 0x80000000 + hash_key(key) = (hash(key)%UInt32) desired_index(hash, sz) = (hash & (sz - 1)) + 1 - isslotfilled(h::RobinDict, index) = (h.hashes[index] != 0) + isslotfilled(h::RobinDict, index) = (h.meta[index] != 0) isslotempty(h::RobinDict, index) = (h.hashes[index] == 0) + DIBS_BYTES = 8 + DIBS_MASK = 0x0000_00FF + hash_meta(meta::UInt32) = (meta>>DIBS_BYTES) + dibs_meta(meta::UInt32) = Int(meta & DIBS_MASK) function calculate_distance(h::RobinDict{K, V}, index) where {K, V} @assert isslotfilled(h, index) - sz = length(h.keys) - @inbounds index_init = desired_index(h.hashes[index], sz) - return (index - index_init + sz) & (sz - 1) + return dibs_meta(h.meta[index]) end function get_idxfloor(h::RobinDict) @@ -407,7 +409,7 @@ end for i in 1:length(h1.keys) if isslotfilled(h1, i) - @test hash_key(h1.keys[i]) == h1.hashes[i] + @test hash_meta((hash_key(h1.keys[i])<= des_ind) pos_diff = i - des_ind