From 78e27b30686478285bff51a3afc7552ff526a1e9 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 2 Mar 2024 23:21:25 +0800 Subject: [PATCH 1/6] Unify definitions of `_Allocate` and `_Deallocate` --- stl/inc/xmemory | 127 +++++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 66 deletions(-) diff --git a/stl/inc/xmemory b/stl/inc/xmemory index eeaf3294e6..92b931cd24 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -190,98 +190,93 @@ inline void _Adjust_manually_vector_aligned(void*& _Ptr, size_t& _Bytes) { } #endif // defined(_M_IX86) || defined(_M_X64) -#ifdef __cpp_aligned_new -template __STDCPP_DEFAULT_NEW_ALIGNMENT__), int> = 0> +template __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { - // allocate _Bytes when __cpp_aligned_new && _Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__ - if (_Bytes == 0) { - return nullptr; - } + // allocate _Bytes +#ifdef __cpp_aligned_new + if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + if (_Bytes == 0) { + return nullptr; + } #if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - return _Traits::_Allocate(_Bytes); - } else + if (_STD is_constant_evaluated()) { + return _Traits::_Allocate(_Bytes); + } else #endif // _HAS_CXX20 - { - size_t _Passed_align = _Align; + { + size_t _Passed_align = _Align; #if defined(_M_IX86) || defined(_M_X64) - if (_Bytes >= _Big_allocation_threshold) { - // boost the alignment of big allocations to help autovectorization - _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); - } + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); + } #endif // defined(_M_IX86) || defined(_M_X64) - return _Traits::_Allocate_aligned(_Bytes, _Passed_align); - } -} - -template __STDCPP_DEFAULT_NEW_ALIGNMENT__), int> = 0> -_CONSTEXPR20 void _Deallocate(void* _Ptr, const size_t _Bytes) noexcept { - // deallocate storage allocated by _Allocate when __cpp_aligned_new && _Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__ -#if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - ::operator delete(_Ptr); + return _Traits::_Allocate_aligned(_Bytes, _Passed_align); + } } else -#endif // _HAS_CXX20 +#endif // defined(__cpp_aligned_new) { - size_t _Passed_align = _Align; -#if defined(_M_IX86) || defined(_M_X64) - if (_Bytes >= _Big_allocation_threshold) { // boost the alignment of big allocations to help autovectorization - _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); - } -#endif // defined(_M_IX86) || defined(_M_X64) - ::operator delete(_Ptr, _Bytes, align_val_t{_Passed_align}); - } -} - -#define _HAS_ALIGNED_NEW 1 -#else // ^^^ defined(__cpp_aligned_new) / !defined(__cpp_aligned_new) vvv -#define _HAS_ALIGNED_NEW 0 -#endif // ^^^ !defined(__cpp_aligned_new) ^^^ - -template = 0> -__declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { - // allocate _Bytes when !_HAS_ALIGNED_NEW || _Align <= __STDCPP_DEFAULT_NEW_ALIGNMENT__ #if defined(_M_IX86) || defined(_M_X64) #if _HAS_CXX20 // TRANSITION, GH-1532 - if (!_STD is_constant_evaluated()) + if (!_STD is_constant_evaluated()) #endif // _HAS_CXX20 - { - if (_Bytes >= _Big_allocation_threshold) { // boost the alignment of big allocations to help autovectorization - return _Allocate_manually_vector_aligned<_Traits>(_Bytes); + { + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + return _Allocate_manually_vector_aligned<_Traits>(_Bytes); + } } - } #endif // defined(_M_IX86) || defined(_M_X64) - if (_Bytes != 0) { - return _Traits::_Allocate(_Bytes); - } + if (_Bytes != 0) { + return _Traits::_Allocate(_Bytes); + } - return nullptr; + return nullptr; + } } -template = 0> +template _CONSTEXPR20 void _Deallocate(void* _Ptr, size_t _Bytes) noexcept { - // deallocate storage allocated by _Allocate when !_HAS_ALIGNED_NEW || _Align <= __STDCPP_DEFAULT_NEW_ALIGNMENT__ + // deallocate storage allocated by _Allocate +#ifdef __cpp_aligned_new + if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { #if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - ::operator delete(_Ptr); - } else + if (_STD is_constant_evaluated()) { + ::operator delete(_Ptr); + } else #endif // _HAS_CXX20 - { + { + size_t _Passed_align = _Align; #if defined(_M_IX86) || defined(_M_X64) - if (_Bytes >= _Big_allocation_threshold) { // boost the alignment of big allocations to help autovectorization - _Adjust_manually_vector_aligned(_Ptr, _Bytes); + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); + } +#endif // defined(_M_IX86) || defined(_M_X64) + ::operator delete(_Ptr, _Bytes, align_val_t{_Passed_align}); } + } else +#endif // defined(__cpp_aligned_new) + { +#if _HAS_CXX20 // TRANSITION, GH-1532 + if (_STD is_constant_evaluated()) { + ::operator delete(_Ptr); + } else +#endif // _HAS_CXX20 + { +#if defined(_M_IX86) || defined(_M_X64) + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + _Adjust_manually_vector_aligned(_Ptr, _Bytes); + } #endif // defined(_M_IX86) || defined(_M_X64) - ::operator delete(_Ptr, _Bytes); + ::operator delete(_Ptr, _Bytes); + } } } -#undef _HAS_ALIGNED_NEW - template _Ty* _Global_new(_Types&&... _Args) { // acts as "new" while disallowing user overload selection struct _NODISCARD _Guard_type { From ed5a6e77a0ebc73ced06b15359419fd4b6b6ab8c Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 5 Mar 2024 14:28:26 -0800 Subject: [PATCH 2/6] In `_Deallocate`, extract the `is_constant_evaluated` codepath with an early `return`. --- stl/inc/xmemory | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 92b931cd24..5dc165f261 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -240,40 +240,33 @@ __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { template _CONSTEXPR20 void _Deallocate(void* _Ptr, size_t _Bytes) noexcept { // deallocate storage allocated by _Allocate -#ifdef __cpp_aligned_new - if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { #if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - ::operator delete(_Ptr); - } else + if (_STD is_constant_evaluated()) { + ::operator delete(_Ptr); + return; + } #endif // _HAS_CXX20 - { - size_t _Passed_align = _Align; + +#ifdef __cpp_aligned_new + if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + size_t _Passed_align = _Align; #if defined(_M_IX86) || defined(_M_X64) - if (_Bytes >= _Big_allocation_threshold) { - // boost the alignment of big allocations to help autovectorization - _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); - } -#endif // defined(_M_IX86) || defined(_M_X64) - ::operator delete(_Ptr, _Bytes, align_val_t{_Passed_align}); + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); } +#endif // defined(_M_IX86) || defined(_M_X64) + ::operator delete(_Ptr, _Bytes, align_val_t{_Passed_align}); } else #endif // defined(__cpp_aligned_new) { -#if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - ::operator delete(_Ptr); - } else -#endif // _HAS_CXX20 - { #if defined(_M_IX86) || defined(_M_X64) - if (_Bytes >= _Big_allocation_threshold) { - // boost the alignment of big allocations to help autovectorization - _Adjust_manually_vector_aligned(_Ptr, _Bytes); - } -#endif // defined(_M_IX86) || defined(_M_X64) - ::operator delete(_Ptr, _Bytes); + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + _Adjust_manually_vector_aligned(_Ptr, _Bytes); } +#endif // defined(_M_IX86) || defined(_M_X64) + ::operator delete(_Ptr, _Bytes); } } From 916c934b4fee421b21a95d9b06aeb88e974e361d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 5 Mar 2024 14:40:36 -0800 Subject: [PATCH 3/6] In `_Allocate`, extract `if (_Bytes == 0) { return nullptr; }`. When `_Bytes == 0`, the branch `_Bytes >= _Big_allocation_threshold` will certainly not be taken, so we can perform this transformation. --- stl/inc/xmemory | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 5dc165f261..61c2c37826 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -193,12 +193,12 @@ inline void _Adjust_manually_vector_aligned(void*& _Ptr, size_t& _Bytes) { template __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { // allocate _Bytes + if (_Bytes == 0) { + return nullptr; + } + #ifdef __cpp_aligned_new if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { - if (_Bytes == 0) { - return nullptr; - } - #if _HAS_CXX20 // TRANSITION, GH-1532 if (_STD is_constant_evaluated()) { return _Traits::_Allocate(_Bytes); @@ -228,12 +228,7 @@ __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { } } #endif // defined(_M_IX86) || defined(_M_X64) - - if (_Bytes != 0) { - return _Traits::_Allocate(_Bytes); - } - - return nullptr; + return _Traits::_Allocate(_Bytes); } } From 888d6c70a89675923710d0f813daefa1e6b9676a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 5 Mar 2024 14:46:03 -0800 Subject: [PATCH 4/6] In `_Allocate`, after `if`-`return` we don't need `else`. --- stl/inc/xmemory | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 61c2c37826..f458817cb3 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -202,18 +202,16 @@ __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { #if _HAS_CXX20 // TRANSITION, GH-1532 if (_STD is_constant_evaluated()) { return _Traits::_Allocate(_Bytes); - } else + } #endif // _HAS_CXX20 - { - size_t _Passed_align = _Align; + size_t _Passed_align = _Align; #if defined(_M_IX86) || defined(_M_X64) - if (_Bytes >= _Big_allocation_threshold) { - // boost the alignment of big allocations to help autovectorization - _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); - } -#endif // defined(_M_IX86) || defined(_M_X64) - return _Traits::_Allocate_aligned(_Bytes, _Passed_align); + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + _Passed_align = (_STD max)(_Align, _Big_allocation_alignment); } +#endif // defined(_M_IX86) || defined(_M_X64) + return _Traits::_Allocate_aligned(_Bytes, _Passed_align); } else #endif // defined(__cpp_aligned_new) { From 37afe4fc64f0d853a4d6ce46c6e964ef004a4e50 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 5 Mar 2024 14:52:19 -0800 Subject: [PATCH 5/6] In `_Allocate`, flip a test to early return `if (_STD is_constant_evaluated())`. --- stl/inc/xmemory | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stl/inc/xmemory b/stl/inc/xmemory index f458817cb3..41bf756f40 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -217,13 +217,13 @@ __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { { #if defined(_M_IX86) || defined(_M_X64) #if _HAS_CXX20 // TRANSITION, GH-1532 - if (!_STD is_constant_evaluated()) + if (_STD is_constant_evaluated()) { + return _Traits::_Allocate(_Bytes); + } #endif // _HAS_CXX20 - { - if (_Bytes >= _Big_allocation_threshold) { - // boost the alignment of big allocations to help autovectorization - return _Allocate_manually_vector_aligned<_Traits>(_Bytes); - } + if (_Bytes >= _Big_allocation_threshold) { + // boost the alignment of big allocations to help autovectorization + return _Allocate_manually_vector_aligned<_Traits>(_Bytes); } #endif // defined(_M_IX86) || defined(_M_X64) return _Traits::_Allocate(_Bytes); From 0ef06dcb0930e1d8bad4df45e2275bc240452717 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 5 Mar 2024 15:03:34 -0800 Subject: [PATCH 6/6] In `_Allocate`, extract an early return for `is_constant_evaluated`. This transformation has very minor impact. Sometimes, we'll call `is_constant_evaluated` when we would have returned `_Traits::_Allocate(_Bytes)` anyways. (This happens when we don't take the high-`_Align` branch and we aren't x86/x64.) This is an extremely minor cost, and it completely vanishes in release mode. The code simplicity is well worth it. --- stl/inc/xmemory | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 41bf756f40..edfae5bb95 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -197,13 +197,14 @@ __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { return nullptr; } -#ifdef __cpp_aligned_new - if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { #if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - return _Traits::_Allocate(_Bytes); - } + if (_STD is_constant_evaluated()) { + return _Traits::_Allocate(_Bytes); + } #endif // _HAS_CXX20 + +#ifdef __cpp_aligned_new + if constexpr (_Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { size_t _Passed_align = _Align; #if defined(_M_IX86) || defined(_M_X64) if (_Bytes >= _Big_allocation_threshold) { @@ -216,11 +217,6 @@ __declspec(allocator) _CONSTEXPR20 void* _Allocate(const size_t _Bytes) { #endif // defined(__cpp_aligned_new) { #if defined(_M_IX86) || defined(_M_X64) -#if _HAS_CXX20 // TRANSITION, GH-1532 - if (_STD is_constant_evaluated()) { - return _Traits::_Allocate(_Bytes); - } -#endif // _HAS_CXX20 if (_Bytes >= _Big_allocation_threshold) { // boost the alignment of big allocations to help autovectorization return _Allocate_manually_vector_aligned<_Traits>(_Bytes);