Skip to content

Commit 9bb1ebf

Browse files
committed
[MicroBenchmarks] Add matrix type benchmarks.
1 parent ff244c4 commit 9bb1ebf

File tree

3 files changed

+171
-0
lines changed

3 files changed

+171
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
add_subdirectory(Int128)
2+
add_subdirectory(MatrixType)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Enable matrix types benchmarks for compilers supporting -fenable-matrix.
2+
check_c_compiler_flag(-fenable-matrix COMPILER_HAS_MATRIX_FLAG)
3+
if (COMPILER_HAS_MATRIX_FLAG)
4+
set(CMAKE_CXX_STANDARD 20)
5+
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
6+
7+
llvm_test_run()
8+
9+
set_property(SOURCE main.cpp PROPERTY COMPILE_FLAGS -fenable-matrix)
10+
11+
llvm_test_executable(MatrixType main.cpp)
12+
target_link_libraries(MatrixType benchmark)
13+
endif()
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#include <algorithm>
2+
#include <cstdint>
3+
#include <limits>
4+
#include <random>
5+
#include <ranges>
6+
#include <vector>
7+
8+
#include <simd/simd.h>
9+
10+
#include "benchmark/benchmark.h"
11+
#include <iostream>
12+
13+
namespace {
14+
15+
using m44 = double __attribute__((matrix_type(4, 4)));
16+
17+
class MatrixMult4x4Benchmark : public benchmark::Fixture {
18+
public:
19+
void SetUp(const benchmark::State &) override {
20+
mats.clear();
21+
mats2.clear();
22+
mats_res.clear();
23+
mats2_res.clear();
24+
std::default_random_engine generator;
25+
std::uniform_real_distribution<double> distribution(-10.0, 10.0);
26+
for (unsigned X = 0; X < kDataSize; ++X) {
27+
m44 M;
28+
for (unsigned J = 0; J < 4; ++J)
29+
for (unsigned I = 0; I < 4; ++I)
30+
M[J][I] = distribution(generator);
31+
mats.push_back(M);
32+
mats_res.push_back(M);
33+
}
34+
for (auto &m : mats) {
35+
simd_double4x4 s;
36+
s.columns[0] = {m[0][0], m[1][0], m[2][0], m[3][0]};
37+
s.columns[1] = {m[0][1], m[1][1], m[2][1], m[3][1]};
38+
s.columns[2] = {m[0][2], m[1][2], m[2][2], m[3][2]};
39+
s.columns[3] = {m[0][3], m[1][3], m[2][3], m[3][3]};
40+
mats2.push_back(s);
41+
mats2_res.push_back(s);
42+
}
43+
}
44+
45+
protected:
46+
static constexpr size_t kDataSize = 1024;
47+
std::vector<m44> mats;
48+
std::vector<m44> mats_res;
49+
std::vector<simd_double4x4> mats2;
50+
std::vector<simd_double4x4> mats2_res;
51+
};
52+
53+
void benchCVAMatrixABStoreRes(std::vector<m44> &mats,
54+
std::vector<m44> &mats_res) {
55+
size_t N = mats.size();
56+
for (size_t i = 0u; i < N; ++i) {
57+
const m44 a = mats[i];
58+
const m44 b = mats[(i + 1) % N];
59+
const m44 prod = a * b;
60+
mats_res[i] = prod;
61+
}
62+
}
63+
64+
BENCHMARK_F(MatrixMult4x4Benchmark, MatrixTypeAB)(benchmark::State &state) {
65+
while (state.KeepRunning()) {
66+
benchCVAMatrixABStoreRes(mats, mats_res);
67+
}
68+
}
69+
70+
void benchSIMDMatrixABStoreRes(std::vector<simd_double4x4> &mats,
71+
std::vector<simd_double4x4> &mats_res) {
72+
size_t N = mats.size();
73+
for (size_t i = 0u; i < N; ++i) {
74+
const simd_double4x4 a = mats[i];
75+
const simd_double4x4 b = mats[(i + 1) % N];
76+
const simd_double4x4 prod = matrix_multiply(a, b);
77+
mats_res[i] = prod;
78+
// benchmark::DoNotOptimize(prod);
79+
}
80+
}
81+
82+
BENCHMARK_F(MatrixMult4x4Benchmark, SIMDMatrixAB)(benchmark::State &state) {
83+
while (state.KeepRunning()) {
84+
benchSIMDMatrixABStoreRes(mats2, mats2_res);
85+
}
86+
}
87+
88+
void benchCVAMatrixAtB(const std::vector<m44> &mats) {
89+
size_t N = mats.size();
90+
for (size_t i = 0u; i < N; ++i) {
91+
const m44 a = mats[i];
92+
const m44 b = mats[(i + 1) % N];
93+
const m44 prod = __builtin_matrix_transpose(a) * b;
94+
benchmark::DoNotOptimize(prod);
95+
}
96+
}
97+
98+
BENCHMARK_F(MatrixMult4x4Benchmark, MatrixTypeAtB)(benchmark::State &state) {
99+
while (state.KeepRunning()) {
100+
benchCVAMatrixAtB(mats);
101+
}
102+
}
103+
104+
void benchSIMDMatrixAtB(const std::vector<simd_double4x4> &mats) {
105+
size_t N = mats.size();
106+
for (size_t i = 0u; i < N; ++i) {
107+
const simd_double4x4 a = mats[i];
108+
const simd_double4x4 b = mats[(i + 1) % N];
109+
const simd_double4x4 prod = matrix_multiply(simd_transpose(a), b);
110+
benchmark::DoNotOptimize(prod);
111+
}
112+
}
113+
114+
BENCHMARK_F(MatrixMult4x4Benchmark, SIMDMatrixAtB)(benchmark::State &state) {
115+
while (state.KeepRunning()) {
116+
benchSIMDMatrixAtB(mats2);
117+
}
118+
}
119+
120+
void benchCVAMatrixAtBStoreRes(std::vector<m44> &mats,
121+
std::vector<m44> &mats_res) {
122+
size_t N = mats.size();
123+
for (size_t i = 0u; i < N; ++i) {
124+
const m44 a = mats[i];
125+
const m44 b = mats[(i + 1) % N];
126+
const m44 prod = __builtin_matrix_transpose(a) * b;
127+
mats[i] = prod;
128+
}
129+
}
130+
131+
BENCHMARK_F(MatrixMult4x4Benchmark,
132+
MatrixTypeAtBStoreRes)(benchmark::State &state) {
133+
while (state.KeepRunning()) {
134+
benchCVAMatrixAtBStoreRes(mats, mats_res);
135+
}
136+
}
137+
138+
void benchSIMDMatrixAtBStoreRes(std::vector<simd_double4x4> &mats,
139+
std::vector<simd_double4x4> &mats_res) {
140+
size_t N = mats.size();
141+
for (size_t i = 0u; i < N; ++i) {
142+
const simd_double4x4 a = mats[i];
143+
const simd_double4x4 b = mats[(i + 1) % N];
144+
const simd_double4x4 prod = matrix_multiply(simd_transpose(a), b);
145+
mats[i] = prod;
146+
}
147+
}
148+
149+
BENCHMARK_F(MatrixMult4x4Benchmark,
150+
SIMDMatrixAtBStoreRes)(benchmark::State &state) {
151+
while (state.KeepRunning()) {
152+
benchSIMDMatrixAtBStoreRes(mats2, mats2_res);
153+
}
154+
}
155+
} // namespace
156+
157+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)