Skip to content

Commit 1bf39ae

Browse files
committed
[MicroBenchmarks] Add matrix type benchmarks.
1 parent ff244c4 commit 1bf39ae

File tree

3 files changed

+163
-0
lines changed

3 files changed

+163
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
add_subdirectory(Int128)
2+
add_subdirectory(MatrixType)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Enable matrix types benchmarks for compilers supporting -fenable-matrix.
2+
check_c_compiler_flag(-fenable-matrix COMPILER_HAS_MATRIX_FLAG)
3+
if (COMPILER_HAS_MATRIX_FLAG)
4+
set(CMAKE_CXX_STANDARD 20)
5+
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
6+
7+
llvm_test_run()
8+
9+
set_property(SOURCE main.cpp PROPERTY COMPILE_FLAGS -fenable-matrix)
10+
11+
llvm_test_executable(MatrixType main.cpp)
12+
target_link_libraries(MatrixType benchmark)
13+
endif()
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#include <algorithm>
2+
#include <cstdint>
3+
#include <limits>
4+
#include <random>
5+
#include <ranges>
6+
#include <vector>
7+
8+
#if __has_include(<simd/simd.h>)
9+
#define HAS_SIMD_HEADER 1
10+
#include <simd/simd.h>
11+
#else
12+
#define HAS_SIMD_HEADER 0
13+
#endif
14+
15+
#include "benchmark/benchmark.h"
16+
#include <iostream>
17+
18+
namespace {
19+
20+
using m44 = double __attribute__((matrix_type(4, 4)));
21+
22+
class MatrixMult4x4Benchmark : public benchmark::Fixture {
23+
public:
24+
void SetUp(const benchmark::State &) override {
25+
std::default_random_engine generator;
26+
std::uniform_real_distribution<double> distribution(-10.0, 10.0);
27+
28+
mats.clear();
29+
mats_res.clear();
30+
for (unsigned X = 0; X < kDataSize; ++X) {
31+
m44 M;
32+
for (unsigned J = 0; J < 4; ++J)
33+
for (unsigned I = 0; I < 4; ++I)
34+
M[J][I] = distribution(generator);
35+
mats.push_back(M);
36+
mats_res.push_back(M);
37+
}
38+
39+
#ifdef HAS_SIMD_HEADER
40+
mats_simd_res.clear();
41+
mats_simd.clear();
42+
for (auto &m : mats) {
43+
simd_double4x4 s;
44+
s.columns[0] = {m[0][0], m[1][0], m[2][0], m[3][0]};
45+
s.columns[1] = {m[0][1], m[1][1], m[2][1], m[3][1]};
46+
s.columns[2] = {m[0][2], m[1][2], m[2][2], m[3][2]};
47+
s.columns[3] = {m[0][3], m[1][3], m[2][3], m[3][3]};
48+
mats_simd.push_back(s);
49+
mats_simd_res.push_back(s);
50+
}
51+
#endif
52+
}
53+
54+
protected:
55+
static constexpr size_t kDataSize = 1024;
56+
std::vector<m44> mats;
57+
std::vector<m44> mats_res;
58+
#ifdef HAS_SIMD_HEADER
59+
std::vector<simd_double4x4> mats_simd;
60+
std::vector<simd_double4x4> mats_simd_res;
61+
#endif
62+
};
63+
64+
BENCHMARK_F(MatrixMult4x4Benchmark, MatrixTypeAB)(benchmark::State &state) {
65+
while (state.KeepRunning()) {
66+
size_t N = mats.size();
67+
for (size_t i = 0u; i < N; ++i) {
68+
const m44 a = mats[i];
69+
const m44 b = mats[(i + 1) % N];
70+
const m44 prod = a * b;
71+
mats_res[i] = prod;
72+
}
73+
benchmark::ClobberMemory();
74+
}
75+
}
76+
77+
#ifdef HAS_SIMD_HEADER
78+
BENCHMARK_F(MatrixMult4x4Benchmark, SIMDMatrixAB)(benchmark::State &state) {
79+
while (state.KeepRunning()) {
80+
size_t N = mats.size();
81+
for (size_t i = 0u; i < N; ++i) {
82+
const simd_double4x4 a = mats_simd[i];
83+
const simd_double4x4 b = mats_simd[(i + 1) % N];
84+
const simd_double4x4 prod = matrix_multiply(a, b);
85+
mats_simd_res[i] = prod;
86+
}
87+
benchmark::ClobberMemory();
88+
}
89+
}
90+
#endif
91+
92+
BENCHMARK_F(MatrixMult4x4Benchmark, MatrixTypeAtB)(benchmark::State &state) {
93+
while (state.KeepRunning()) {
94+
size_t N = mats.size();
95+
for (size_t i = 0u; i < N; ++i) {
96+
const m44 a = mats[i];
97+
const m44 b = mats[(i + 1) % N];
98+
const m44 prod = __builtin_matrix_transpose(a) * b;
99+
benchmark::DoNotOptimize(prod);
100+
}
101+
}
102+
}
103+
104+
#ifdef HAS_SIMD_HEADER
105+
BENCHMARK_F(MatrixMult4x4Benchmark, SIMDMatrixAtB)(benchmark::State &state) {
106+
while (state.KeepRunning()) {
107+
size_t N = mats.size();
108+
for (size_t i = 0u; i < N; ++i) {
109+
const simd_double4x4 a = mats_simd[i];
110+
const simd_double4x4 b = mats_simd[(i + 1) % N];
111+
const simd_double4x4 prod = matrix_multiply(simd_transpose(a), b);
112+
benchmark::DoNotOptimize(prod);
113+
}
114+
}
115+
}
116+
#endif
117+
118+
BENCHMARK_F(MatrixMult4x4Benchmark,
119+
MatrixTypeAtBStoreRes)(benchmark::State &state) {
120+
while (state.KeepRunning()) {
121+
size_t N = mats.size();
122+
for (size_t i = 0u; i < N; ++i) {
123+
const m44 a = mats[i];
124+
const m44 b = mats[(i + 1) % N];
125+
const m44 prod = __builtin_matrix_transpose(a) * b;
126+
mats[i] = prod;
127+
}
128+
benchmark::ClobberMemory();
129+
}
130+
}
131+
132+
#ifdef HAS_SIMD_HEADER
133+
BENCHMARK_F(MatrixMult4x4Benchmark,
134+
SIMDMatrixAtBStoreRes)(benchmark::State &state) {
135+
while (state.KeepRunning()) {
136+
size_t N = mats.size();
137+
for (size_t i = 0u; i < N; ++i) {
138+
const simd_double4x4 a = mats_simd[i];
139+
const simd_double4x4 b = mats_simd[(i + 1) % N];
140+
const simd_double4x4 prod = matrix_multiply(simd_transpose(a), b);
141+
mats_simd[i] = prod;
142+
}
143+
benchmark::ClobberMemory();
144+
}
145+
}
146+
#endif
147+
} // namespace
148+
149+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)