Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small improvement to SipHasher #37312

Merged
merged 1 commit into from
Oct 26, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 97 additions & 50 deletions src/libcore/hash/sip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

use marker::PhantomData;
use ptr;
use cmp;
use mem;

/// An implementation of SipHash 1-3.
///
Expand Down Expand Up @@ -78,45 +80,6 @@ struct State {
v3: u64,
}

// sadly, these macro definitions can't appear later,
// because they're needed in the following defs;
// this design could be improved.

macro_rules! u8to64_le {
($buf:expr, $i:expr) =>
($buf[0+$i] as u64 |
($buf[1+$i] as u64) << 8 |
($buf[2+$i] as u64) << 16 |
($buf[3+$i] as u64) << 24 |
($buf[4+$i] as u64) << 32 |
($buf[5+$i] as u64) << 40 |
($buf[6+$i] as u64) << 48 |
($buf[7+$i] as u64) << 56);
($buf:expr, $i:expr, $len:expr) =>
({
let mut t = 0;
let mut out = 0;
while t < $len {
out |= ($buf[t+$i] as u64) << t*8;
t += 1;
}
out
});
}

/// Load a full u64 word from a byte stream, in LE order. Use
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
/// to load u64 from a possibly unaligned address.
///
/// Unsafe because: unchecked indexing at i..i+8
#[inline]
unsafe fn load_u64_le(buf: &[u8], i: usize) -> u64 {
debug_assert!(i + 8 <= buf.len());
let mut data = 0u64;
ptr::copy_nonoverlapping(buf.get_unchecked(i), &mut data as *mut _ as *mut u8, 8);
data.to_le()
}

macro_rules! compress {
($state:expr) => ({
compress!($state.v0, $state.v1, $state.v2, $state.v3)
Expand All @@ -132,6 +95,47 @@ macro_rules! compress {
});
}

/// Load an integer of the desired type from a byte stream, in LE order. Uses
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
/// to load it from a possibly unaligned address.
///
/// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
macro_rules! load_int_le {
($buf:expr, $i:expr, $int_ty:ident) =>
({
debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
let mut data = 0 as $int_ty;
ptr::copy_nonoverlapping($buf.get_unchecked($i),
&mut data as *mut _ as *mut u8,
mem::size_of::<$int_ty>());
data.to_le()
});
}

/// Load an u64 using up to 7 bytes of a byte slice.
///
/// Unsafe because: unchecked indexing at start..start+len
#[inline]
unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
debug_assert!(len < 8);
let mut i = 0; // current byte index (from LSB) in the output u64
let mut out = 0;
if i + 3 < len {
out = load_int_le!(buf, start + i, u32) as u64;
i += 4;
}
if i + 1 < len {
out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
i += 2
}
if i < len {
out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
i += 1;
}
debug_assert_eq!(i, len);
out
}

impl SipHasher {
/// Creates a new `SipHasher` with the two initial keys set to 0.
#[inline]
Expand Down Expand Up @@ -220,6 +224,37 @@ impl<S: Sip> Hasher<S> {
self.state.v3 = self.k1 ^ 0x7465646279746573;
self.ntail = 0;
}

// Specialized write function that is only valid for buffers with len <= 8.
// It's used to force inlining of write_u8 and write_usize, those would normally be inlined
// except for composite types (that includes slices and str hashing because of delimiter).
// Without this extra push the compiler is very reluctant to inline delimiter writes,
// degrading performance substantially for the most common use cases.
#[inline(always)]
fn short_write(&mut self, msg: &[u8]) {
debug_assert!(msg.len() <= 8);
let length = msg.len();
self.length += length;

let needed = 8 - self.ntail;
let fill = cmp::min(length, needed);
if fill == 8 {
self.tail = unsafe { load_int_le!(msg, 0, u64) };
} else {
self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
if length < needed {
self.ntail += length;
return;
}
}
self.state.v3 ^= self.tail;
S::c_rounds(&mut self.state);
self.state.v0 ^= self.tail;

// Buffered tail is now flushed, process new input.
self.ntail = length - needed;
self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -262,6 +297,21 @@ impl super::Hasher for SipHasher24 {
}

impl<S: Sip> super::Hasher for Hasher<S> {
// see short_write comment for explanation
#[inline]
fn write_usize(&mut self, i: usize) {
let bytes = unsafe {
::slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
};
self.short_write(bytes);
}

// see short_write comment for explanation
#[inline]
fn write_u8(&mut self, i: u8) {
self.short_write(&[i]);
}

#[inline]
fn write(&mut self, msg: &[u8]) {
let length = msg.len();
Expand All @@ -271,19 +321,16 @@ impl<S: Sip> super::Hasher for Hasher<S> {

if self.ntail != 0 {
needed = 8 - self.ntail;
self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << 8 * self.ntail;
if length < needed {
self.tail |= u8to64_le!(msg, 0, length) << 8 * self.ntail;
self.ntail += length;
return
} else {
self.state.v3 ^= self.tail;
S::c_rounds(&mut self.state);
self.state.v0 ^= self.tail;
self.ntail = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch previously didn't update self.tail, but now it is. To confirm, that's ok?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok, as it's marking it has 0 valid bytes at the end.

}

let m = self.tail | u8to64_le!(msg, 0, needed) << 8 * self.ntail;

self.state.v3 ^= m;
S::c_rounds(&mut self.state);
self.state.v0 ^= m;

self.ntail = 0;
}

// Buffered tail is now flushed, process new input.
Expand All @@ -292,7 +339,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {

let mut i = needed;
while i < len - left {
let mi = unsafe { load_u64_le(msg, i) };
let mi = unsafe { load_int_le!(msg, i, u64) };

self.state.v3 ^= mi;
S::c_rounds(&mut self.state);
Expand All @@ -301,7 +348,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
i += 8;
}

self.tail = u8to64_le!(msg, i, left);
self.tail = unsafe { u8to64_le(msg, i, left) };
self.ntail = left;
}

Expand Down
21 changes: 21 additions & 0 deletions src/libcoretest/hash/sip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use test::{Bencher, black_box};

use core::hash::{Hash, Hasher};
use core::hash::{SipHasher, SipHasher13, SipHasher24};
use core::{slice, mem};

// Hash just the bytes of the slice, without length prefix
struct Bytes<'a>(&'a [u8]);
Expand Down Expand Up @@ -327,6 +328,26 @@ fn test_hash_no_concat_alias() {
assert!(hash(&v) != hash(&w));
}

#[test]
fn test_write_short_works() {
let test_usize = 0xd0c0b0a0usize;
let mut h1 = SipHasher24::new();
h1.write_usize(test_usize);
h1.write(b"bytes");
h1.write(b"string");
h1.write_u8(0xFFu8);
h1.write_u8(0x01u8);
let mut h2 = SipHasher24::new();
h2.write(unsafe {
slice::from_raw_parts(&test_usize as *const _ as *const u8,
mem::size_of::<usize>())
});
h2.write(b"bytes");
h2.write(b"string");
h2.write(&[0xFFu8, 0x01u8]);
assert_eq!(h1.finish(), h2.finish());
}

#[bench]
fn bench_str_under_8_bytes(b: &mut Bencher) {
let s = "foo";
Expand Down