From 975db03a3146198f7777d33779d93d231e2a2667 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Mon, 7 Feb 2022 14:05:46 -0800 Subject: [PATCH] Add sha256 Presto function Differential Revision: D34052892 fbshipit-source-id: c857326f24c8a50e9f4afcf3c051014d7607fd0b --- velox/docs/functions/binary.rst | 4 ++ velox/functions/lib/string/StringImpl.h | 11 ++++++ velox/functions/prestosql/StringFunctions.h | 11 ++++++ .../StringFunctionsRegistration.cpp | 1 + .../prestosql/tests/StringFunctionsTest.cpp | 37 +++++++++++++++---- 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/velox/docs/functions/binary.rst b/velox/docs/functions/binary.rst index dbe44737d7b3..72b54de06ca1 100644 --- a/velox/docs/functions/binary.rst +++ b/velox/docs/functions/binary.rst @@ -10,6 +10,10 @@ Binary Functions Computes the md5 hash of ``binary``. +.. function:: sha256(binary) -> varbinary + + Computes the SHA-256 hash of ``binary``. + .. function:: to_base64(binary) -> varchar Encodes ``binary`` into a base64 string representation. diff --git a/velox/functions/lib/string/StringImpl.h b/velox/functions/lib/string/StringImpl.h index f187173d51c7..c07af6958ddb 100644 --- a/velox/functions/lib/string/StringImpl.h +++ b/velox/functions/lib/string/StringImpl.h @@ -27,6 +27,7 @@ #include #include "folly/CPortability.h" #include "folly/Likely.h" +#include "folly/ssl/OpenSSLHash.h" #include "velox/common/base/Exceptions.h" #include "velox/common/encode/Base64.h" #include "velox/external/md5/md5.h" @@ -275,6 +276,16 @@ FOLLY_ALWAYS_INLINE bool md5_radix( return true; } +/// Compute the SHA256 Hash. +template +FOLLY_ALWAYS_INLINE bool sha256(TOutString& output, const TInString& input) { + output.resize(32); + folly::ssl::OpenSSLHash::sha256( + folly::MutableByteRange((uint8_t*)output.data(), output.size()), + folly::ByteRange((const uint8_t*)input.data(), input.size())); + return true; +} + template FOLLY_ALWAYS_INLINE bool toHex(TOutString& output, const TInString& input) { static const char* const kHexTable = diff --git a/velox/functions/prestosql/StringFunctions.h b/velox/functions/prestosql/StringFunctions.h index a2efa9e18b40..45321ee3491b 100644 --- a/velox/functions/prestosql/StringFunctions.h +++ b/velox/functions/prestosql/StringFunctions.h @@ -82,6 +82,17 @@ struct Md5Function { } }; +/// sha256(varbinary) -> varbinary +template +struct Sha256Function { + VELOX_DEFINE_FUNCTION_TYPES(T); + + template + FOLLY_ALWAYS_INLINE bool call(TTo& result, const TFrom& input) { + return stringImpl::sha256(result, input); + } +}; + template struct ToHexFunction { VELOX_DEFINE_FUNCTION_TYPES(T); diff --git a/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp b/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp index 1aa6635b3b9b..e4b3e3288d5e 100644 --- a/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/StringFunctionsRegistration.cpp @@ -57,6 +57,7 @@ void registerSimpleFunctions() { // Register hash functions. registerFunction({"xxhash64"}); registerFunction({"md5"}); + registerFunction({"sha256"}); registerFunction({"to_hex"}); registerFunction({"from_hex"}); diff --git a/velox/functions/prestosql/tests/StringFunctionsTest.cpp b/velox/functions/prestosql/tests/StringFunctionsTest.cpp index b54c7110917f..d0dc62f1c435 100644 --- a/velox/functions/prestosql/tests/StringFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/StringFunctionsTest.cpp @@ -68,14 +68,15 @@ int expectedLength(int i) { } std::string hexToDec(const std::string& str) { - char output[16]; - auto chars = str.data(); - for (int i = 0; i < 16; i++) { - int high = facebook::velox::functions::stringImpl::fromHex(chars[2 * i]); - int low = facebook::velox::functions::stringImpl::fromHex(chars[2 * i + 1]); - output[i] = (high << 4) | (low & 0xf); + VELOX_CHECK_EQ(str.size() % 2, 0); + std::string out; + out.resize(str.size() / 2); + for (int i = 0; i < out.size(); ++i) { + int high = facebook::velox::functions::stringImpl::fromHex(str[2 * i]); + int low = facebook::velox::functions::stringImpl::fromHex(str[2 * i + 1]); + out[i] = (high << 4) | (low & 0xf); } - return std::string(output, 16); + return out; } } // namespace @@ -1029,6 +1030,28 @@ TEST_F(StringFunctionsTest, md5) { EXPECT_EQ(std::nullopt, md5(std::nullopt)); } +TEST_F(StringFunctionsTest, sha256) { + const auto sha256 = [&](std::optional arg) { + return evaluateOnce( + "sha256(c0)", {arg}, {VARBINARY()}); + }; + + EXPECT_EQ( + hexToDec( + "02208b9403a87df9f4ed6b2ee2657efaa589026b4cce9accc8e8a5bf3d693c86"), + sha256("hashme")); + EXPECT_EQ( + hexToDec( + "d0067cad9a63e0813759a2bb841051ca73570c0da2e08e840a8eb45db6a7a010"), + sha256("Infinity")); + EXPECT_EQ( + hexToDec( + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), + sha256("")); + + EXPECT_EQ(std::nullopt, sha256(std::nullopt)); +} + void StringFunctionsTest::testReplaceInPlace( const std::vector>& tests, const std::string& search,