Skip to content

Commit

Permalink
Experiment in removing ptr->int->ptr casts
Browse files Browse the repository at this point in the history
  • Loading branch information
steven-johnson committed Jun 27, 2023
1 parent c7ca15f commit 6579c66
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 21 deletions.
44 changes: 32 additions & 12 deletions src/runtime/HalideBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <cstring>
#include <limits>
#include <memory>
#include <type_traits>
#include <vector>

#ifdef __APPLE__
Expand Down Expand Up @@ -710,6 +711,27 @@ class Buffer {
"Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
}

template<typename SomeType>
inline SomeType align_up(SomeType value, size_t alignment) {
// Doing pointer -> integer -> pointer conversions can force
// the compiler to pessimize optimizations (see e.g.
// https://www.ralfj.de/blog/2020/12/14/provenance.html),
// so, if our compiler has builtins for this, use them;
// if not, fall back to casting as necessary.
#if defined(__has_builtin) && __has_builtin(__builtin_align_up)
return __builtin_align_up(value, alignment);
#else
if constexpr (std::is_pointer<SomeType>::value) {
uintptr_t uvalue = (uintptr_t)(value);
uvalue = (uvalue + alignment - 1) & ~(alignment - 1);
return (SomeType)uvalue;
} else {
static_assert(std::is_integral<SomeType>::value, "align_up only handles pointers and integral types");
return (value + alignment - 1) & ~(alignment - 1);
}
#endif
};

public:
/** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
* If this can be determined at compile time, fail with a static assert; otherwise
Expand Down Expand Up @@ -885,10 +907,6 @@ class Buffer {
// is such that the logical size is an integral multiple of 128 bytes (or a bit more).
constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;

const auto align_up = [=](size_t value) -> size_t {
return (value + alignment - 1) & ~(alignment - 1);
};

size_t size = size_in_bytes();

#if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
Expand All @@ -899,10 +917,11 @@ class Buffer {
// so that the user storage also starts at an aligned point. This is a bit
// wasteful, but probably not a big deal.
static_assert(sizeof(AllocationHeader) <= alignment);
void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
assert((uintptr_t)alloc_storage == align_up((uintptr_t)alloc_storage));
size_t aligned_size = align_up(size, alignment);
uint8_t *alloc_storage = (uint8_t *)::aligned_alloc(alignment, aligned_size + alignment);
assert(alloc_storage == align_up(alloc_storage, alignment));
alloc = new (alloc_storage) AllocationHeader(free);
buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
buf.host = alloc_storage + alignment;
return;
}
// else fall thru
Expand All @@ -921,12 +940,13 @@ class Buffer {
static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));

const size_t requested_size = align_up(size + alignment +
std::max(0, (int)sizeof(AllocationHeader) -
(int)sizeof(std::max_align_t)));
void *alloc_storage = allocate_fn(requested_size);
std::max(0, (int)sizeof(AllocationHeader) -
(int)sizeof(std::max_align_t)),
alignment);
uint8_t *alloc_storage = (uint8_t *)allocate_fn(requested_size);
alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
buf.host = (uint8_t *)align_up((uintptr_t)unaligned_ptr);
uint8_t *unaligned_ptr = (uint8_t *)alloc + sizeof(AllocationHeader);
buf.host = align_up(unaligned_ptr, alignment);
}

/** Drop reference to any owned host or device memory, possibly
Expand Down
8 changes: 4 additions & 4 deletions src/runtime/posix_aligned_alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ WEAK_INLINE void *halide_internal_aligned_alloc(size_t alignment, size_t size) {
// Always round allocations up to alignment size,
// so that all allocators follow the behavior of aligned_alloc() and
// return aligned pointer *and* aligned length.
const size_t aligned_size = align_up(size + alignment, alignment);
const size_t aligned_size = __builtin_align_up(size + alignment, alignment);

void *orig = ::malloc(aligned_size);
uint8_t *orig = (uint8_t *)::malloc(aligned_size);
if (orig == nullptr) {
// Will result in a failed assertion and a call to halide_error
return nullptr;
Expand All @@ -28,10 +28,10 @@ WEAK_INLINE void *halide_internal_aligned_alloc(size_t alignment, size_t size) {
// alignof(std::max_align_t); we can't reasonably check that in
// the runtime, but we can realistically assume it's at least
// 8-aligned.
halide_debug_assert(nullptr, (((uintptr_t)orig) % 8) == 0);
halide_debug_assert(nullptr, __builtin_is_aligned(orig, 8));

// We want to store the original pointer prior to the pointer we return.
void *ptr = (void *)align_up((uintptr_t)orig + sizeof(void *), alignment);
void *ptr = (void *)__builtin_align_up(orig + sizeof(void *), alignment);
((void **)ptr)[-1] = orig;
return ptr;
}
Expand Down
5 changes: 0 additions & 5 deletions src/runtime/runtime_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,6 @@ void halide_thread_yield();

} // extern "C"

template<typename T>
ALWAYS_INLINE T align_up(T p, size_t alignment) {
return (p + alignment - 1) & ~(alignment - 1);
}

template<typename T>
ALWAYS_INLINE T is_power_of_two(T value) {
return (value != 0) && ((value & (value - 1)) == 0);
Expand Down

0 comments on commit 6579c66

Please sign in to comment.