Skip to content

Commit

Permalink
Improve Shift Ops (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
cyberjunk committed May 15, 2024
1 parent ef15ed1 commit d3f9923
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 24 deletions.
2 changes: 2 additions & 0 deletions include/CppCore.Interface.C/cppcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

// macro for large integer arithmetic function declarations
#define CPPCORE_UINT_DECLARATION(name) \
CPPCORE_EXPORT void name ## _shl (void* a, unsigned int b, void* r); \
CPPCORE_EXPORT void name ## _shr (void* a, unsigned int b, void* r); \
CPPCORE_EXPORT void name ## _add (void* a, void* b, void* r); \
CPPCORE_EXPORT void name ## _sub (void* a, void* b, void* r); \
CPPCORE_EXPORT void name ## _mul (void* a, void* b, void* r); \
Expand Down
76 changes: 52 additions & 24 deletions include/CppCore/BitOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -1803,8 +1803,9 @@ namespace CppCore
static INLINE uint32_t shld32(uint32_t a, const uint32_t b, const uint8_t n)
{
#if defined(CPPCORE_CPU_X86ORX64) && defined(CPPCORE_COMPILER_CLANG)
__asm("SHLDL %[n], %[b], %[a]" : "=Rm"(a) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return a;
uint32_t r;
__asm("SHLDL %[n], %[b], %[a]" : "=Rm"(r) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return r;
#else
return (a << n) | (b >> (uint8_t)(32U-n));
#endif
Expand All @@ -1817,8 +1818,9 @@ namespace CppCore
static INLINE uint64_t shld64(uint64_t a, const uint64_t b, const uint8_t n)
{
#if defined(CPPCORE_CPU_X64) && defined(CPPCORE_COMPILER_CLANG)
__asm("SHLDQ %[n], %[b], %[a]" : "=Rm"(a) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return a;
uint64_t r;
__asm("SHLDQ %[n], %[b], %[a]" : "=Rm"(r) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return r;
#elif defined(CPPCORE_CPU_64BIT) && defined(CPPCORE_COMPILER_MSVC)
return __shiftleft128(b, a, n);
#else
Expand Down Expand Up @@ -1867,8 +1869,9 @@ namespace CppCore
static INLINE uint32_t shrd32(uint32_t a, const uint32_t b, const uint8_t n)
{
#if defined(CPPCORE_CPU_X86ORX64) && defined(CPPCORE_COMPILER_CLANG)
__asm("SHRDL %[n], %[b], %[a]" : "=Rm"(a) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return a;
uint32_t r;
__asm("SHRDL %[n], %[b], %[a]" : "=Rm"(r) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return r;
#else
return (a >> n) | (b << (uint8_t)(32U-n));
#endif
Expand All @@ -1881,8 +1884,9 @@ namespace CppCore
static INLINE uint64_t shrd64(uint64_t a, const uint64_t b, const uint8_t n)
{
#if defined(CPPCORE_CPU_X64) && defined(CPPCORE_COMPILER_CLANG)
__asm("SHRDQ %[n], %[b], %[a]" : "=Rm"(a) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return a;
uint64_t r;
__asm("SHRDQ %[n], %[b], %[a]" : "=Rm"(r) : [n]"Ic"(n), [b]"r"(b), [a]"0"(a));
return r;
#elif defined(CPPCORE_CPU_64BIT) && defined(CPPCORE_COMPILER_MSVC)
return __shiftright128(a, b, n);
#else
Expand Down Expand Up @@ -1960,24 +1964,32 @@ namespace CppCore
/// Left shifts a into r (both n32 * 32 bit chunks) by s bits.
/// Uses shld32 and s must be larger 0 and less than 32.
/// </summary>
static INLINE void shl32x(const uint32_t* a, uint32_t* r, const uint32_t n32, const uint8_t s)
static INLINE void shl32x(const uint32_t* a, uint32_t* r, const uint32_t n32, const size_t s)
{
assert(a && r && n32 && s > 0 && s < 32);
for (uint32_t i = n32-1U; i != 0U; i--)
r[i] = CppCore::shld32(a[i], a[i-1], s);
r[0] = a[0] << s;
uint32_t t1 = a[n32-1];
for (uint32_t i = n32-1U; i != 0U; i--) {
uint32_t t2 = a[i-1];
r[i] = CppCore::shld32(t1, t2, s);
t1 = t2;
}
r[0] = t1 << s;
}

/// <summary>
/// Left shifts a into r (both n64 * 64 bit chunks) by s bits.
/// Uses shld64 and s must be larger 0 and less than 64.
/// </summary>
static INLINE void shl64x(const uint64_t* a, uint64_t* r, const uint32_t n64, const uint8_t s)
static INLINE void shl64x(const uint64_t* a, uint64_t* r, const uint32_t n64, const size_t s)
{
assert(a && r && n64 && s > 0 && s < 64);
for (uint32_t i = n64-1U; i != 0U; i--)
r[i] = CppCore::shld64(a[i], a[i-1], s);
r[0] = a[0] << s;
uint64_t t1 = a[n64-1];
for (uint32_t i = n64-1U; i != 0U; i--){
uint64_t t2 = a[i-1];
r[i] = CppCore::shld64(t1, t2, s);
t1 = t2;
}
r[0] = t1 << s;
}

#if defined(CPPCORE_CPUFEAT_SSE2)
Expand Down Expand Up @@ -2069,9 +2081,13 @@ namespace CppCore
static INLINE void shr32x(const uint32_t* a, uint32_t* r, const uint32_t n32, const uint8_t s)
{
assert(a && r && n32 && s > 0 && s < 32);
for (uint32_t i = 0; i < n32-1U; i++)
r[i] = CppCore::shrd32(a[i], a[i+1], s);
r[n32-1] = a[n32-1] >> s;
uint32_t t1 = a[0];
for (uint32_t i = 0; i < n32-1U; i++) {
uint32_t t2 = a[i+1];
r[i] = CppCore::shrd32(t1, t2, s);
t1 = t2;
}
r[n32-1] = t1 >> s;
}

/// <summary>
Expand All @@ -2081,9 +2097,13 @@ namespace CppCore
static INLINE void shr64x(const uint64_t* a, uint64_t* r, const uint32_t n64, const uint8_t s)
{
assert(a && r && n64 && s > 0 && s < 64);
for (uint32_t i = 0; i < n64-1U; i++)
r[i] = CppCore::shrd64(a[i], a[i+1], s);
r[n64-1] = a[n64-1] >> s;
uint64_t t1 = a[0];
for (uint32_t i = 0; i < n64-1U; i++) {
uint64_t t2 = a[i+1];
r[i] = CppCore::shrd64(t1, t2, s);
t1 = t2;
}
r[n64-1] = t1 >> s;
}

#if defined(CPPCORE_CPUFEAT_SSE2)
Expand Down Expand Up @@ -2143,10 +2163,14 @@ namespace CppCore
static INLINE void shl(UINT& r, const UINT& a,const size_t b)
{
static_assert(sizeof(UINT) % 4 == 0);
if (b >= (sizeof(UINT) << 3)) {
if (b >= (sizeof(UINT) << 3)) CPPCORE_UNLIKELY {
CppCore::clear(r);
return;
}
if (b == 0U) CPPCORE_UNLIKELY {
CppCore::clone(r, a);
return;
}
#if defined(CPPCORE_CPU_64BIT)
if constexpr (sizeof(UINT) % 8 == 0)
{
Expand Down Expand Up @@ -2192,10 +2216,14 @@ namespace CppCore
static INLINE void shr(UINT& r, const UINT& a,const size_t b)
{
static_assert(sizeof(UINT) % 4 == 0);
if (b >= (sizeof(UINT) << 3)) {
if (b >= (sizeof(UINT) << 3)) CPPCORE_UNLIKELY {
CppCore::clear(r);
return;
}
if (b == 0U) CPPCORE_UNLIKELY {
CppCore::clone(r, a);
return;
}
#if defined(CPPCORE_CPU_64BIT)
if constexpr (sizeof(UINT) % 8 == 0)
{
Expand Down
2 changes: 2 additions & 0 deletions src/CppCore.Interface.C/cppcore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ BOOL WINAPI DllMain(
#include <CppCore/Block.h>

#define CPPCORE_UINT_IMPLEMENTATION(name, cname) \
void name ## _shl (void* a, unsigned int b, void* r) { CppCore::shl(*(cname*)r, *(cname*)a, b);} \
void name ## _shr (void* a, unsigned int b, void* r) { CppCore::shr(*(cname*)r, *(cname*)a, b);} \
void name ## _add (void* a, void* b, void* r) { CppCore::uadd(*(cname*)a, *(cname*)b, *(cname*)r);} \
void name ## _sub (void* a, void* b, void* r) { CppCore::usub(*(cname*)a, *(cname*)b, *(cname*)r);} \
void name ## _mul (void* a, void* b, void* r) { CppCore::umul(*(cname*)a, *(cname*)b, *(cname*)r);} \
Expand Down

0 comments on commit d3f9923

Please sign in to comment.