From 1bf39ae942ce1f781e2a69c90b904d3ca8e33820 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 23 Apr 2025 11:32:35 +0100 Subject: [PATCH] [MicroBenchmarks] Add matrix type benchmarks. --- MicroBenchmarks/Builtins/CMakeLists.txt | 1 + .../Builtins/MatrixType/CMakeLists.txt | 13 ++ MicroBenchmarks/Builtins/MatrixType/main.cpp | 149 ++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 MicroBenchmarks/Builtins/MatrixType/CMakeLists.txt create mode 100644 MicroBenchmarks/Builtins/MatrixType/main.cpp diff --git a/MicroBenchmarks/Builtins/CMakeLists.txt b/MicroBenchmarks/Builtins/CMakeLists.txt index dfeac9ae3a..f3c31889a6 100644 --- a/MicroBenchmarks/Builtins/CMakeLists.txt +++ b/MicroBenchmarks/Builtins/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Int128) +add_subdirectory(MatrixType) diff --git a/MicroBenchmarks/Builtins/MatrixType/CMakeLists.txt b/MicroBenchmarks/Builtins/MatrixType/CMakeLists.txt new file mode 100644 index 0000000000..27b3d054bd --- /dev/null +++ b/MicroBenchmarks/Builtins/MatrixType/CMakeLists.txt @@ -0,0 +1,13 @@ +# Enable matrix types benchmarks for compilers supporting -fenable-matrix. +check_c_compiler_flag(-fenable-matrix COMPILER_HAS_MATRIX_FLAG) +if (COMPILER_HAS_MATRIX_FLAG) + set(CMAKE_CXX_STANDARD 20) + set(CMAKE_CXX_STANDARD_REQUIRED TRUE) + + llvm_test_run() + + set_property(SOURCE main.cpp PROPERTY COMPILE_FLAGS -fenable-matrix) + + llvm_test_executable(MatrixType main.cpp) + target_link_libraries(MatrixType benchmark) +endif() diff --git a/MicroBenchmarks/Builtins/MatrixType/main.cpp b/MicroBenchmarks/Builtins/MatrixType/main.cpp new file mode 100644 index 0000000000..60325dd427 --- /dev/null +++ b/MicroBenchmarks/Builtins/MatrixType/main.cpp @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include +#include + +#if __has_include() +#define HAS_SIMD_HEADER 1 +#include +#else +#define HAS_SIMD_HEADER 0 +#endif + +#include "benchmark/benchmark.h" +#include + +namespace { + +using m44 = double __attribute__((matrix_type(4, 4))); + +class MatrixMult4x4Benchmark : public benchmark::Fixture { +public: + void SetUp(const benchmark::State &) override { + std::default_random_engine generator; + std::uniform_real_distribution distribution(-10.0, 10.0); + + mats.clear(); + mats_res.clear(); + for (unsigned X = 0; X < kDataSize; ++X) { + m44 M; + for (unsigned J = 0; J < 4; ++J) + for (unsigned I = 0; I < 4; ++I) + M[J][I] = distribution(generator); + mats.push_back(M); + mats_res.push_back(M); + } + +#ifdef HAS_SIMD_HEADER + mats_simd_res.clear(); + mats_simd.clear(); + for (auto &m : mats) { + simd_double4x4 s; + s.columns[0] = {m[0][0], m[1][0], m[2][0], m[3][0]}; + s.columns[1] = {m[0][1], m[1][1], m[2][1], m[3][1]}; + s.columns[2] = {m[0][2], m[1][2], m[2][2], m[3][2]}; + s.columns[3] = {m[0][3], m[1][3], m[2][3], m[3][3]}; + mats_simd.push_back(s); + mats_simd_res.push_back(s); + } +#endif + } + +protected: + static constexpr size_t kDataSize = 1024; + std::vector mats; + std::vector mats_res; +#ifdef HAS_SIMD_HEADER + std::vector mats_simd; + std::vector mats_simd_res; +#endif +}; + +BENCHMARK_F(MatrixMult4x4Benchmark, MatrixTypeAB)(benchmark::State &state) { + while (state.KeepRunning()) { + size_t N = mats.size(); + for (size_t i = 0u; i < N; ++i) { + const m44 a = mats[i]; + const m44 b = mats[(i + 1) % N]; + const m44 prod = a * b; + mats_res[i] = prod; + } + benchmark::ClobberMemory(); + } +} + +#ifdef HAS_SIMD_HEADER +BENCHMARK_F(MatrixMult4x4Benchmark, SIMDMatrixAB)(benchmark::State &state) { + while (state.KeepRunning()) { + size_t N = mats.size(); + for (size_t i = 0u; i < N; ++i) { + const simd_double4x4 a = mats_simd[i]; + const simd_double4x4 b = mats_simd[(i + 1) % N]; + const simd_double4x4 prod = matrix_multiply(a, b); + mats_simd_res[i] = prod; + } + benchmark::ClobberMemory(); + } +} +#endif + +BENCHMARK_F(MatrixMult4x4Benchmark, MatrixTypeAtB)(benchmark::State &state) { + while (state.KeepRunning()) { + size_t N = mats.size(); + for (size_t i = 0u; i < N; ++i) { + const m44 a = mats[i]; + const m44 b = mats[(i + 1) % N]; + const m44 prod = __builtin_matrix_transpose(a) * b; + benchmark::DoNotOptimize(prod); + } + } +} + +#ifdef HAS_SIMD_HEADER +BENCHMARK_F(MatrixMult4x4Benchmark, SIMDMatrixAtB)(benchmark::State &state) { + while (state.KeepRunning()) { + size_t N = mats.size(); + for (size_t i = 0u; i < N; ++i) { + const simd_double4x4 a = mats_simd[i]; + const simd_double4x4 b = mats_simd[(i + 1) % N]; + const simd_double4x4 prod = matrix_multiply(simd_transpose(a), b); + benchmark::DoNotOptimize(prod); + } + } +} +#endif + +BENCHMARK_F(MatrixMult4x4Benchmark, + MatrixTypeAtBStoreRes)(benchmark::State &state) { + while (state.KeepRunning()) { + size_t N = mats.size(); + for (size_t i = 0u; i < N; ++i) { + const m44 a = mats[i]; + const m44 b = mats[(i + 1) % N]; + const m44 prod = __builtin_matrix_transpose(a) * b; + mats[i] = prod; + } + benchmark::ClobberMemory(); + } +} + +#ifdef HAS_SIMD_HEADER +BENCHMARK_F(MatrixMult4x4Benchmark, + SIMDMatrixAtBStoreRes)(benchmark::State &state) { + while (state.KeepRunning()) { + size_t N = mats.size(); + for (size_t i = 0u; i < N; ++i) { + const simd_double4x4 a = mats_simd[i]; + const simd_double4x4 b = mats_simd[(i + 1) % N]; + const simd_double4x4 prod = matrix_multiply(simd_transpose(a), b); + mats_simd[i] = prod; + } + benchmark::ClobberMemory(); + } +} +#endif +} // namespace + +BENCHMARK_MAIN();