From e6e519b37fa33f7e20615d0ab41b24be0feba3a6 Mon Sep 17 00:00:00 2001 From: Mikhail Katliar Date: Sun, 4 Aug 2024 14:14:10 +0200 Subject: [PATCH] - Computational complexity calculated in the same way for all gemm tests - No hard-coded alpha and beta in static gemm tests --- bench/analysis/dgemm_performance.py | 2 ++ bench/analysis/dgemm_performance_ratio.py | 2 ++ bench/blas/Gemm.cpp | 10 ++++------ bench/blasfeo/Gemm.cpp | 3 +-- bench/blast/math/dense/DynamicGemm.cpp | 13 +++++++------ bench/blast/math/dense/StaticGemm.cpp | 5 +++-- bench/blast/math/panel/DynamicGemm.cpp | 4 ++-- bench/blast/math/panel/StaticGemm.cpp | 5 ++--- bench/blaze/Gemm.cpp | 8 ++++---- bench/eigen/Gemm.cpp | 8 +++----- bench/libxsmm/Gemm.cpp | 15 ++++++++------- include/bench/Gemm.hpp | 14 ++++++++++++++ 12 files changed, 52 insertions(+), 37 deletions(-) diff --git a/bench/analysis/dgemm_performance.py b/bench/analysis/dgemm_performance.py index 2998639a..61c51277 100644 --- a/bench/analysis/dgemm_performance.py +++ b/bench/analysis/dgemm_performance.py @@ -1,3 +1,5 @@ +import matplotlib +matplotlib.use("Agg") import matplotlib.pyplot as plt import json diff --git a/bench/analysis/dgemm_performance_ratio.py b/bench/analysis/dgemm_performance_ratio.py index bb2ee21e..d8addef9 100644 --- a/bench/analysis/dgemm_performance_ratio.py +++ b/bench/analysis/dgemm_performance_ratio.py @@ -1,3 +1,5 @@ +import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plt import json diff --git a/bench/blas/Gemm.cpp b/bench/blas/Gemm.cpp index b6140e83..5fcab5ef 100644 --- a/bench/blas/Gemm.cpp +++ b/bench/blas/Gemm.cpp @@ -2,9 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - - -#include +#include #include @@ -31,10 +29,10 @@ namespace blast :: benchmark for (auto _ : state) gemm(C, trans(A), B, 1.0, 1.0); - state.counters["flops"] = Counter(2 * m * m * m, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, m, m)); state.counters["m"] = m; } - BENCHMARK_TEMPLATE(BM_gemm, double)->DenseRange(1, 50); - BENCHMARK_TEMPLATE(BM_gemm, float)->DenseRange(1, 50); + BENCHMARK_TEMPLATE(BM_gemm, double)->DenseRange(1, BENCHMARK_MAX_GEMM); + BENCHMARK_TEMPLATE(BM_gemm, float)->DenseRange(1, BENCHMARK_MAX_GEMM); } diff --git a/bench/blasfeo/Gemm.cpp b/bench/blasfeo/Gemm.cpp index f3635cb0..4ba471b8 100644 --- a/bench/blasfeo/Gemm.cpp +++ b/bench/blasfeo/Gemm.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace blast :: benchmark @@ -53,7 +52,7 @@ namespace blast :: benchmark for (auto _ : state) gemm_nt(m, n, k, 1., A, 0, 0, B, 0, 0, 1., C, 0, 0, C, 0, 0); - state.counters["flops"] = Counter(2 * m * n * k + 3 * m * n, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, n, k)); state.counters["m"] = m; } diff --git a/bench/blast/math/dense/DynamicGemm.cpp b/bench/blast/math/dense/DynamicGemm.cpp index 14229286..d505ad55 100644 --- a/bench/blast/math/dense/DynamicGemm.cpp +++ b/bench/blast/math/dense/DynamicGemm.cpp @@ -4,14 +4,12 @@ #include +#include + #include -#include #include -#include -#include - namespace blast :: benchmark { @@ -26,21 +24,24 @@ namespace blast :: benchmark DynamicMatrix B(N, K); DynamicMatrix C(M, N); DynamicMatrix D(M, N); + Real alpha, beta; randomize(A); randomize(B); randomize(C); + randomize(alpha); + randomize(beta); for (auto _ : state) { - gemm(1., A, trans(B), 1., C, D); + gemm(alpha, A, B, beta, C, D); DoNotOptimize(A); DoNotOptimize(B); DoNotOptimize(C); DoNotOptimize(D); } - state.counters["flops"] = Counter(2 * M * N * K, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(M, N, K)); state.counters["m"] = M; } diff --git a/bench/blast/math/dense/StaticGemm.cpp b/bench/blast/math/dense/StaticGemm.cpp index 0534185d..7b133008 100644 --- a/bench/blast/math/dense/StaticGemm.cpp +++ b/bench/blast/math/dense/StaticGemm.cpp @@ -4,9 +4,10 @@ #include +#include + #include -#include #include @@ -39,7 +40,7 @@ namespace blast :: benchmark DoNotOptimize(D); } - state.counters["flops"] = Counter(2 * M * N * K + 3 * M * N, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(M, N, K)); state.counters["m"] = M; } diff --git a/bench/blast/math/panel/DynamicGemm.cpp b/bench/blast/math/panel/DynamicGemm.cpp index 5143c08f..96ac4d62 100644 --- a/bench/blast/math/panel/DynamicGemm.cpp +++ b/bench/blast/math/panel/DynamicGemm.cpp @@ -2,11 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "blast/math/StorageOrder.hpp" #include #include #include + #include @@ -40,7 +40,7 @@ namespace blast :: benchmark DoNotOptimize(D); } - state.counters["flops"] = Counter(2 * M * N * K, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(M, N, K)); state.counters["m"] = M; } diff --git a/bench/blast/math/panel/StaticGemm.cpp b/bench/blast/math/panel/StaticGemm.cpp index cd3b90e1..0d4f4ee2 100644 --- a/bench/blast/math/panel/StaticGemm.cpp +++ b/bench/blast/math/panel/StaticGemm.cpp @@ -2,12 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "blast/math/StorageOrder.hpp" #include #include #include -#include + #include @@ -40,7 +39,7 @@ namespace blast :: benchmark DoNotOptimize(D); } - state.counters["flops"] = Counter(2 * M * N * K, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(M, N, K)); state.counters["m"] = M; } diff --git a/bench/blaze/Gemm.cpp b/bench/blaze/Gemm.cpp index 8ca21883..eb4681e7 100644 --- a/bench/blaze/Gemm.cpp +++ b/bench/blaze/Gemm.cpp @@ -34,7 +34,7 @@ namespace blast :: benchmark DoNotOptimize(C); } - state.counters["flops"] = Counter(2 * M * N * K, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(M, N, K)); state.counters["m"] = M; state.counters["n"] = N; state.counters["k"] = K; @@ -63,7 +63,7 @@ namespace blast :: benchmark DoNotOptimize(C); } - state.counters["flops"] = Counter(2 * m * m * m, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, m, m)); state.counters["m"] = m; } @@ -90,7 +90,7 @@ namespace blast :: benchmark } } - state.counters["flops"] = Counter(2 * m * m * m, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, m, m)); state.counters["m"] = m; } @@ -123,7 +123,7 @@ namespace blast :: benchmark } } - state.counters["flops"] = Counter(2 * m * m * m, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, m, m)); state.counters["m"] = m; } diff --git a/bench/eigen/Gemm.cpp b/bench/eigen/Gemm.cpp index 6c221bca..f1e26c26 100644 --- a/bench/eigen/Gemm.cpp +++ b/bench/eigen/Gemm.cpp @@ -12,8 +12,6 @@ #include -#include - namespace blast :: benchmark { @@ -43,7 +41,7 @@ namespace blast :: benchmark ::benchmark::DoNotOptimize(C); } - state.counters["flops"] = Counter(2 * M * N * K, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(M, N, K)); state.counters["m"] = M; state.counters["n"] = N; state.counters["k"] = K; @@ -72,12 +70,12 @@ namespace blast :: benchmark ::benchmark::DoNotOptimize(C); } - state.counters["flops"] = Counter(2 * m * m * m, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, m, m)); state.counters["m"] = m; } - BENCHMARK_TEMPLATE(BM_gemm_dynamic, double)->DenseRange(1, 50); + BENCHMARK_TEMPLATE(BM_gemm_dynamic, double)->DenseRange(1, BENCHMARK_MAX_GEMM); #define BOOST_PP_LOCAL_LIMITS (1, BENCHMARK_MAX_GEMM) diff --git a/bench/libxsmm/Gemm.cpp b/bench/libxsmm/Gemm.cpp index 12f0b411..c6faefb7 100644 --- a/bench/libxsmm/Gemm.cpp +++ b/bench/libxsmm/Gemm.cpp @@ -2,7 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include + #include #include @@ -39,7 +40,7 @@ namespace blast :: benchmark for (auto _ : state) kernel(a.data(), b.data(), c.data()); - state.counters["flops"] = Counter(m * n * k, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, n, k)); state.counters["m"] = m; } @@ -69,14 +70,14 @@ namespace blast :: benchmark for (auto _ : state) kernel(a.data(), b.data(), c.data()); - state.counters["flops"] = Counter(2 * m * n * k, Counter::kIsIterationInvariantRate); + setCounters(state.counters, complexityGemm(m, n, k)); state.counters["m"] = m; } - BENCHMARK_TEMPLATE(BM_gemm_nn, double)->DenseRange(1, 50); - BENCHMARK_TEMPLATE(BM_gemm_nt, double)->DenseRange(1, 50); + BENCHMARK_TEMPLATE(BM_gemm_nn, double)->DenseRange(1, BENCHMARK_MAX_GEMM); + BENCHMARK_TEMPLATE(BM_gemm_nt, double)->DenseRange(1, BENCHMARK_MAX_GEMM); - BENCHMARK_TEMPLATE(BM_gemm_nn, float)->DenseRange(1, 50); - BENCHMARK_TEMPLATE(BM_gemm_nt, float)->DenseRange(1, 50); + BENCHMARK_TEMPLATE(BM_gemm_nn, float)->DenseRange(1, BENCHMARK_MAX_GEMM); + BENCHMARK_TEMPLATE(BM_gemm_nt, float)->DenseRange(1, BENCHMARK_MAX_GEMM); } diff --git a/include/bench/Gemm.hpp b/include/bench/Gemm.hpp index 63535b35..d9f7088e 100644 --- a/include/bench/Gemm.hpp +++ b/include/bench/Gemm.hpp @@ -5,5 +5,19 @@ #pragma once #include +#include #define BENCHMARK_MAX_GEMM 50 + + +namespace blast :: benchmark +{ + /// @brief Algorithmic complexity of gemm + inline Complexity complexityGemm(std::size_t m, std::size_t n, std::size_t k) + { + return { + {"add", (m * n) * (k + 2)}, + {"mul", (m * n) * (k + 1)}, + }; + } +}