From 6a00256935a1800aeaa9dc7e094841f5649c786d Mon Sep 17 00:00:00 2001 From: Mikhail Katliar Date: Wed, 6 Nov 2024 08:29:55 +0100 Subject: [PATCH 1/5] No more references to Blaze in RegisterMatrixTest.cpp --- test/blast/math/simd/RegisterMatrixTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/blast/math/simd/RegisterMatrixTest.cpp b/test/blast/math/simd/RegisterMatrixTest.cpp index 4b67081..eceab1e 100644 --- a/test/blast/math/simd/RegisterMatrixTest.cpp +++ b/test/blast/math/simd/RegisterMatrixTest.cpp @@ -373,7 +373,7 @@ namespace blast :: testing randomize(b); randomize(C); ET alpha {}; - blaze::randomize(alpha); + randomize(alpha); TypeParam ker; ker.load(1., ptr(C)); From d7561735607b556baa2e8be07c90ec64482e56ae Mon Sep 17 00:00:00 2001 From: Mikhail Katliar Date: Wed, 6 Nov 2024 08:49:51 +0100 Subject: [PATCH 2/5] No mentions of Blaze in RegisterMatrix.hpp --- .../math/register_matrix/RegisterMatrix.hpp | 66 +++++++++++-------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/include/blast/math/register_matrix/RegisterMatrix.hpp b/include/blast/math/register_matrix/RegisterMatrix.hpp index b21de1c..c2113ee 100644 --- a/include/blast/math/register_matrix/RegisterMatrix.hpp +++ b/include/blast/math/register_matrix/RegisterMatrix.hpp @@ -12,14 +12,7 @@ #include #include #include - -#include -#include -#include -#include -#include -#include -#include +#include #include #include @@ -41,17 +34,15 @@ namespace blast /// template class RegisterMatrix - : public blaze::Matrix, SO> { public: static_assert(SO == columnMajor, "Only column-major register matrices are currently supported"); - using BaseType = blaze::Matrix, SO>; - using BaseType::storageOrder; + // TODO: change bool to StorageOrder + static constexpr bool storageOrder = SO; /// @brief Type of matrix elements using ElementType = T; - using CompositeType = RegisterMatrix const&; //!< Data type for composite expression templates. /// @brief Default ctor @@ -84,6 +75,9 @@ namespace blast /// @brief Number of matrix panels + /// + /// TODO: do we need it? deprecate? + /// static size_t constexpr panels() { return RM; @@ -98,6 +92,9 @@ namespace blast /// @brief SIMD size + /// + /// TODO: do we need it? deprecate? + /// static size_t constexpr simdSize() { return SS; @@ -114,7 +111,9 @@ namespace blast /// @brief Set all elements to 0. void reset() noexcept { + #pragma unroll for (size_t i = 0; i < RM; ++i) + #pragma unroll for (size_t j = 0; j < N; ++j) v_[i][j].reset(); } @@ -399,10 +398,10 @@ namespace blast static size_t constexpr RM = M / SS; static size_t constexpr RN = N; - BLAZE_STATIC_ASSERT_MSG((RM > 0), "Number of rows must be not less than SIMD size"); - BLAZE_STATIC_ASSERT_MSG((RN > 0), "Number of columns must be positive"); - BLAZE_STATIC_ASSERT_MSG((M % SS == 0), "Number of rows must be a multiple of SIMD size"); - BLAZE_STATIC_ASSERT_MSG((RM * RN <= registerCapacity(Arch {})), "Not enough registers for a RegisterMatrix"); + static_assert(RM > 0, "Number of rows must be not less than SIMD size"); + static_assert(RN > 0, "Number of columns must be positive"); + static_assert(M % SS == 0, "Number of rows must be a multiple of SIMD size"); + static_assert(RM * RN <= registerCapacity(Arch {}), "Not enough registers for a RegisterMatrix"); SimdVecType v_[RM][RN]; @@ -434,6 +433,7 @@ namespace blast struct StorageOrderHelper> : std::integral_constant {}; + // TODO: deprecate template struct RegisterMatrixTraits; @@ -451,6 +451,20 @@ namespace blast }; + template + inline size_t constexpr rows(RegisterMatrix const& m) noexcept + { + return m.rows(); + } + + + template + inline size_t constexpr columns(RegisterMatrix const& m) noexcept + { + return m.columns(); + } + + template template requires MatrixPointer && (P::storageOrder == columnMajor) @@ -588,7 +602,7 @@ namespace blast template template requires MatrixPointer - BLAZE_ALWAYS_INLINE void RegisterMatrix::trsm(Side side, UpLo uplo, P A) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::trsm(Side side, UpLo uplo, P A) noexcept { if constexpr (SO == columnMajor) { @@ -632,7 +646,7 @@ namespace blast requires VectorPointer && (PA::transposeFlag == columnVector) && VectorPointer && (PB::transposeFlag == rowVector) - BLAZE_ALWAYS_INLINE void RegisterMatrix::ger(T alpha, PA a, PB b) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::ger(T alpha, PA a, PB b) noexcept { BLAZE_STATIC_ASSERT_MSG((RM * RN + RM + 1 <= registerCapacity(Arch {})), "Not enough registers for ger()"); @@ -659,7 +673,7 @@ namespace blast requires VectorPointer && (PA::transposeFlag == columnVector) && VectorPointer && (PB::transposeFlag == rowVector) - BLAZE_ALWAYS_INLINE void RegisterMatrix::ger(PA a, PB b) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::ger(PA a, PB b) noexcept { BLAZE_STATIC_ASSERT_MSG((RM * RN + RM + 1 <= registerCapacity(Arch {})), "Not enough registers for ger()"); @@ -686,7 +700,7 @@ namespace blast requires VectorPointer && (PA::transposeFlag == columnVector) && VectorPointer && (PB::transposeFlag == rowVector) - BLAZE_ALWAYS_INLINE void RegisterMatrix::ger(T alpha, PA a, PB b, size_t m, size_t n) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::ger(T alpha, PA a, PB b, size_t m, size_t n) noexcept { SimdVecType ax[RM]; @@ -711,7 +725,7 @@ namespace blast requires VectorPointer && (PA::transposeFlag == columnVector) && VectorPointer && (PB::transposeFlag == rowVector) - BLAZE_ALWAYS_INLINE void RegisterMatrix::ger(PA a, PB b, size_t m, size_t n) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::ger(PA a, PB b, size_t m, size_t n) noexcept { SimdVecType ax[RM]; @@ -732,7 +746,7 @@ namespace blast template - BLAZE_ALWAYS_INLINE void RegisterMatrix::potrf() noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::potrf() noexcept { static_assert(M >= N, "potrf() not implemented for register matrices with columns more than rows"); static_assert(RM * RN + 2 <= registerCapacity(Arch {}), "Not enough registers"); @@ -767,7 +781,7 @@ namespace blast template template requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer - BLAZE_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, P1 a, UpLo uplo, bool diagonal_unit, P2 b) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, P1 a, UpLo uplo, bool diagonal_unit, P2 b) noexcept { if (diagonal_unit) BLAST_THROW_EXCEPTION(std::logic_error {"Unit diagonal matrices support not implemented in RegisterMatrix::trmm()"}); @@ -817,7 +831,7 @@ namespace blast template template requires MatrixPointer && (PB::storageOrder == columnMajor) && MatrixPointer - BLAZE_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, PB b, PA a, UpLo uplo, bool diagonal_unit) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, PB b, PA a, UpLo uplo, bool diagonal_unit) noexcept { if (diagonal_unit) BLAST_THROW_EXCEPTION(std::logic_error {"Unit diagonal matrices support not implemented in RegisterMatrix::trmm()"}); @@ -866,9 +880,9 @@ namespace blast template template requires MatrixPointer && (PB::storageOrder == columnMajor) && MatrixPointer - BLAZE_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, PB b, PA a, UpLo uplo, bool diagonal_unit, size_t m, size_t n) noexcept + BLAST_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, PB b, PA a, UpLo uplo, bool diagonal_unit, size_t m, size_t n) noexcept { - // NOTE: this implementation does uses unmasked loads from the matrix a, + // NOTE: this implementation uses unmasked loads from the matrix a, // and therefore will access rows of a beyond m-1. // This will result in undefined behavior on unpadded matrices. auto au = ~a; From 06ae6a428070baeac2dcc423f0fc7567b480dde0 Mon Sep 17 00:00:00 2001 From: Mikhail Katliar Date: Wed, 6 Nov 2024 09:06:02 +0100 Subject: [PATCH 3/5] Removed deprecated stuff from RegisterMatrix --- .../math/register_matrix/RegisterMatrix.hpp | 38 ------ test/blast/math/simd/RegisterMatrixTest.cpp | 126 ++++++++---------- 2 files changed, 54 insertions(+), 110 deletions(-) diff --git a/include/blast/math/register_matrix/RegisterMatrix.hpp b/include/blast/math/register_matrix/RegisterMatrix.hpp index c2113ee..052e4db 100644 --- a/include/blast/math/register_matrix/RegisterMatrix.hpp +++ b/include/blast/math/register_matrix/RegisterMatrix.hpp @@ -74,16 +74,6 @@ namespace blast } - /// @brief Number of matrix panels - /// - /// TODO: do we need it? deprecate? - /// - static size_t constexpr panels() - { - return RM; - } - - /// @brief Number of registers used static size_t constexpr registers() { @@ -91,16 +81,6 @@ namespace blast } - /// @brief SIMD size - /// - /// TODO: do we need it? deprecate? - /// - static size_t constexpr simdSize() - { - return SS; - } - - /// @brief Value of the matrix element at row \a i and column \a j T operator()(size_t i, size_t j) const noexcept { @@ -433,24 +413,6 @@ namespace blast struct StorageOrderHelper> : std::integral_constant {}; - // TODO: deprecate - template - struct RegisterMatrixTraits; - - - // TODO: deprecate - template - struct RegisterMatrixTraits> - { - static size_t constexpr simdSize = RegisterMatrix::SS; - static size_t constexpr rows = M; - static size_t constexpr columns = N; - static size_t constexpr elementCount = rows * columns; - - using ElementType = T; - }; - - template inline size_t constexpr rows(RegisterMatrix const& m) noexcept { diff --git a/test/blast/math/simd/RegisterMatrixTest.cpp b/test/blast/math/simd/RegisterMatrixTest.cpp index eceab1e..187f5f4 100644 --- a/test/blast/math/simd/RegisterMatrixTest.cpp +++ b/test/blast/math/simd/RegisterMatrixTest.cpp @@ -52,7 +52,6 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testDefaultCtor) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; RM ker; @@ -66,10 +65,9 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testReset) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticPanelMatrix A; + StaticPanelMatrix A; randomize(A); RM ker; @@ -90,18 +88,17 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testLoadPanel) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticPanelMatrix A; + StaticPanelMatrix A; randomize(A); RM ker; ET const beta = 0.1; ker.load(beta, ptr(A, 0, 0)); - for (size_t i = 0; i < Traits::rows; ++i) - for (size_t j = 0; j < Traits::columns; ++j) + for (size_t i = 0; i < RM::rows(); ++i) + for (size_t j = 0; j < RM::columns(); ++j) EXPECT_EQ(ker(i, j), beta * A(i, j)) << "element mismatch at (" << i << ", " << j << ")"; } @@ -109,10 +106,9 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testPartialLoadPanel) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticPanelMatrix A; + StaticPanelMatrix A; randomize(A); for (size_t m = 0; m <= rows(A); ++m) @@ -136,10 +132,9 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testPartialLoadDense) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticMatrix A; + StaticMatrix A; for (size_t i = 0; i < rows(A); ++i) for (size_t j = 0; j < columns(A); ++j) A(i, j) = 1000 * i + j; @@ -167,18 +162,17 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testLoadStore) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticPanelMatrix A, B; + StaticPanelMatrix A, B; randomize(A); RM ker; ker.load(ptr(A)); ker.store(ptr(B)); - for (size_t i = 0; i < Traits::rows; ++i) - for (size_t j = 0; j < Traits::columns; ++j) + for (size_t i = 0; i < RM::rows(); ++i) + for (size_t j = 0; j < RM::columns(); ++j) EXPECT_EQ(B(i, j), A(i, j)) << "element mismatch at (" << i << ", " << j << ")"; } @@ -186,18 +180,17 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testLoadStore2) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticMatrix A, B(0.); + StaticMatrix A, B(0.); randomize(A); RM ker; ker.load(1., ptr(A, 0, 0)); ker.store(ptr(B, 0, 0)); - for (size_t i = 0; i < Traits::rows; ++i) - for (size_t j = 0; j < Traits::columns; ++j) + for (size_t i = 0; i < RM::rows(); ++i) + for (size_t j = 0; j < RM::columns(); ++j) EXPECT_EQ(B(i, j), A(i, j)) << "element mismatch at (" << i << ", " << j << ")"; } @@ -216,10 +209,6 @@ namespace blast :: testing // store2(ker, B.data(), B.spacing()); EXPECT_EQ(ker, A); - - // for (size_t i = 0; i < ker.rows(); ++i) - // for (size_t j = 0; j < ker.columns(); ++j) - // EXPECT_EQ(ker(i, j), A(i, j)) << "element mismatch at (" << i << ", " << j << ")"; } @@ -280,28 +269,27 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testPartialStore) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticMatrix A_ref; + StaticMatrix A_ref; randomize(A_ref); - StaticPanelMatrix A, B; + StaticPanelMatrix A, B; assign(A, A_ref); RM ker; ker.load(ptr(A)); - for (size_t m = ker.rows() + 1 - ker.simdSize(); m <= Traits::rows; ++m) - for (size_t n = 1; n <= Traits::columns; ++n) + for (size_t m = ker.rows() + 1 - SimdSize_v; m <= RM::rows(); ++m) + for (size_t n = 1; n <= RM::columns(); ++n) { - if (m != Traits::rows && n != Traits::columns) + if (m != RM::rows() && n != RM::columns()) { B = 0.; ker.store(ptr(B), m, n); - for (size_t i = 0; i < Traits::rows; ++i) - for (size_t j = 0; j < Traits::columns; ++j) + for (size_t i = 0; i < RM::rows(); ++i) + for (size_t j = 0; j < RM::columns(); ++j) ASSERT_EQ(B(i, j), i < m && j < n ? A_ref(i, j) : 0.) << "element mismatch at (" << i << ", " << j << "), " << "store size = " << m << "x" << n; } @@ -312,23 +300,22 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testPartialStore2) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticMatrix A, B; + StaticMatrix A, B; randomize(A); RM ker; ker.load(1., ptr(A, 0, 0)); - for (size_t m = 0; m <= Traits::rows; ++m) - for (size_t n = 0; n <= Traits::columns; ++n) + for (size_t m = 0; m <= RM::rows(); ++m) + for (size_t n = 0; n <= RM::columns(); ++n) { B = 0.; ker.store(ptr(B, 0, 0), m, n); - for (size_t i = 0; i < Traits::rows; ++i) - for (size_t j = 0; j < Traits::columns; ++j) + for (size_t i = 0; i < RM::rows(); ++i) + for (size_t j = 0; j < RM::columns(); ++j) ASSERT_EQ(B(i, j), i < m && j < n ? A(i, j) : 0.) << "element mismatch at (" << i << ", " << j << "), " << "store size = " << m << "x" << n; } @@ -338,12 +325,11 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testGerNT) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticPanelMatrix A; - StaticPanelMatrix B; - StaticPanelMatrix C; + StaticPanelMatrix A; + StaticPanelMatrix B; + StaticPanelMatrix C; randomize(A); randomize(B); @@ -362,12 +348,11 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testGer) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - DynamicVector a(Traits::rows); - DynamicVector b(Traits::columns); - StaticMatrix C; + DynamicVector a(RM::rows()); + DynamicVector b(RM::columns()); + StaticMatrix C; randomize(a); randomize(b); @@ -388,19 +373,18 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testPartialGerNT) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - StaticPanelMatrix A; - StaticPanelMatrix B; - StaticPanelMatrix C; + StaticPanelMatrix A; + StaticPanelMatrix B; + StaticPanelMatrix C; randomize(A); randomize(B); randomize(C); - StaticMatrix D; - reference::ger(Traits::rows, Traits::columns, 1., column(ptr(A)), column(ptr(B)).trans(), ptr(C), ptr(D)); + StaticMatrix D; + reference::ger(RM::rows(), RM::columns(), 1., column(ptr(A)), column(ptr(B)).trans(), ptr(C), ptr(D)); for (size_t m = 0; m <= rows(C); ++m) { @@ -423,19 +407,18 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testPartialGerNT2) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - DynamicVector a(Traits::rows); - DynamicVector b(Traits::columns); - StaticMatrix C; + DynamicVector a(RM::rows()); + DynamicVector b(RM::columns()); + StaticMatrix C; randomize(a); randomize(b); randomize(C); - StaticMatrix D; - reference::ger(Traits::rows, Traits::columns, 1., ptr(a), ptr(trans(b)), ptr(C), ptr(D)); + StaticMatrix D; + reference::ger(RM::rows(), RM::columns(), 1., ptr(a), ptr(trans(b)), ptr(C), ptr(D)); for (size_t m = 0; m <= rows(C); ++m) { @@ -458,12 +441,11 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testGerNT2) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; - DynamicVector a(Traits::rows); - DynamicVector b(Traits::columns); - StaticMatrix C, D; + DynamicVector a(RM::rows()); + DynamicVector b(RM::columns()); + StaticMatrix C, D; randomize(a); randomize(b); @@ -474,18 +456,19 @@ namespace blast :: testing ker.ger(ET(1.), ptr(a), ptr(trans(b))); ker.store(ptr(D)); - StaticMatrix D_ref; - reference::ger(Traits::rows, Traits::columns, 1., ptr(a), ptr(trans(b)), ptr(C), ptr(D_ref)); + StaticMatrix D_ref; + reference::ger(RM::rows(), RM::columns(), 1., ptr(a), ptr(trans(b)), ptr(C), ptr(D_ref)); BLAST_EXPECT_APPROX_EQ(D, D_ref, absTol(), relTol()); } TYPED_TEST(RegisterMatrixTest, testPotrf) { - using Traits = RegisterMatrixTraits; - using ET = typename Traits::ElementType; - static size_t constexpr m = Traits::rows; - static size_t constexpr n = Traits::columns; + using RM = TypeParam; + using ET = ElementType_t; + + static size_t constexpr m = RM::rows(); + static size_t constexpr n = RM::columns(); if constexpr (m >= n) { @@ -513,17 +496,16 @@ namespace blast :: testing TYPED_TEST(RegisterMatrixTest, testTrsmRightLowerTransposePanel) { using RM = TypeParam; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; RM ker; - StaticPanelMatrix A; - StaticPanelMatrix B, X; + StaticPanelMatrix A; + StaticPanelMatrix B, X; randomize(A); - for (size_t i = 0; i < Traits::columns; ++i) - A(i, i) += Traits::columns; // Improve conditioning + for (size_t i = 0; i < RM::columns(); ++i) + A(i, i) += RM::columns(); // Improve conditioning randomize(B); From bed73923577a51227dd60d2256c100c716e5dc36 Mon Sep 17 00:00:00 2001 From: Mikhail Katliar Date: Wed, 6 Nov 2024 09:34:15 +0100 Subject: [PATCH 4/5] Changed type of SO template parameter of RegisterMatrix from bool to StorageOrder and fixed benchmarks --- bench/blast/math/simd/Ger.cpp | 9 ++-- bench/blast/math/simd/Load.cpp | 23 +++----- bench/blast/math/simd/PartialGemm.cpp | 11 ++-- bench/blast/math/simd/PartialLoad.cpp | 5 +- bench/blast/math/simd/PartialStore.cpp | 11 ++-- bench/blast/math/simd/Potrf.cpp | 15 ++---- bench/blast/math/simd/Store.cpp | 23 +++----- bench/blast/math/simd/Trmm.cpp | 21 ++++---- bench/blast/math/simd/Trsm.cpp | 11 ++-- include/blast/math/register_matrix/Gemm.hpp | 12 ++--- .../math/register_matrix/RegisterMatrix.hpp | 53 +++++++++---------- 11 files changed, 78 insertions(+), 116 deletions(-) diff --git a/bench/blast/math/simd/Ger.cpp b/bench/blast/math/simd/Ger.cpp index c099c8e..bbd8139 100644 --- a/bench/blast/math/simd/Ger.cpp +++ b/bench/blast/math/simd/Ger.cpp @@ -12,17 +12,16 @@ namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_ger_nt(State& state) { using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; using ET = ElementType_t; size_t constexpr K = 100; - DynamicPanelMatrix a(Traits::rows, K); - DynamicPanelMatrix b(Traits::columns, K); - DynamicPanelMatrix c(Traits::rows, Traits::columns); + DynamicPanelMatrix a(Kernel::rows(), K); + DynamicPanelMatrix b(Kernel::columns(), K); + DynamicPanelMatrix c(Kernel::rows(), Kernel::columns()); randomize(a); randomize(b); diff --git a/bench/blast/math/simd/Load.cpp b/bench/blast/math/simd/Load.cpp index c5ededc..d043fd8 100644 --- a/bench/blast/math/simd/Load.cpp +++ b/bench/blast/math/simd/Load.cpp @@ -11,18 +11,13 @@ #include -#include - namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_load_dynamic_panel(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - - Kernel ker; + RegisterMatrix ker; DynamicPanelMatrix c(ker.rows(), ker.columns()); randomize(c); @@ -37,13 +32,10 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_load_dynamic_dense(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - - Kernel ker; + RegisterMatrix ker; DynamicMatrix c(ker.rows(), ker.columns()); randomize(c); @@ -58,13 +50,10 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_load_static_dense(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - - Kernel ker; + RegisterMatrix ker; StaticMatrix c; randomize(c); diff --git a/bench/blast/math/simd/PartialGemm.cpp b/bench/blast/math/simd/PartialGemm.cpp index 84951e6..962c21d 100644 --- a/bench/blast/math/simd/PartialGemm.cpp +++ b/bench/blast/math/simd/PartialGemm.cpp @@ -5,24 +5,23 @@ #include #include #include +#include #include #include -#include - namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_partialGemm_static(State& state) { size_t constexpr K = 5; - blaze::StaticMatrix A; - blaze::StaticMatrix B; - blaze::StaticMatrix C, D; + StaticMatrix A; + StaticMatrix B; + StaticMatrix C, D; randomize(A); randomize(B); diff --git a/bench/blast/math/simd/PartialLoad.cpp b/bench/blast/math/simd/PartialLoad.cpp index 35bd52e..4aa4aee 100644 --- a/bench/blast/math/simd/PartialLoad.cpp +++ b/bench/blast/math/simd/PartialLoad.cpp @@ -5,19 +5,18 @@ #include #include #include +#include #include #include -#include - #include namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_partialLoad_static_dense(State& state) { using Kernel = RegisterMatrix; diff --git a/bench/blast/math/simd/PartialStore.cpp b/bench/blast/math/simd/PartialStore.cpp index 10f4214..a0bf0f4 100644 --- a/bench/blast/math/simd/PartialStore.cpp +++ b/bench/blast/math/simd/PartialStore.cpp @@ -1,23 +1,22 @@ -// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Copyright (c) 2019-2024 Mikhail Katliar All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include #include #include +#include #include #include -#include - #include namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_partialStore_panel(State& state) { using Kernel = RegisterMatrix; @@ -44,7 +43,7 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_partialStore(State& state) { using Kernel = RegisterMatrix; @@ -71,7 +70,7 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_partialStore_static(State& state) { RegisterMatrix ker; diff --git a/bench/blast/math/simd/Potrf.cpp b/bench/blast/math/simd/Potrf.cpp index 0f44ab5..7681b0d 100644 --- a/bench/blast/math/simd/Potrf.cpp +++ b/bench/blast/math/simd/Potrf.cpp @@ -14,18 +14,13 @@ namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_potrf(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - size_t constexpr m = Traits::rows; - size_t constexpr n = Traits::columns; - - DynamicPanelMatrix a(m, n); + DynamicPanelMatrix a(M, N); randomize(a); - Kernel ker; + RegisterMatrix ker; ker.load(ptr(a)); for (auto _ : state) @@ -34,8 +29,8 @@ namespace blast :: benchmark DoNotOptimize(ker); } - if (m >= n) - setCounters(state.counters, complexityPotrf(m, n)); + if (M >= N) + setCounters(state.counters, complexityPotrf(M, N)); } diff --git a/bench/blast/math/simd/Store.cpp b/bench/blast/math/simd/Store.cpp index 398a5aa..bc98d6c 100644 --- a/bench/blast/math/simd/Store.cpp +++ b/bench/blast/math/simd/Store.cpp @@ -11,18 +11,13 @@ #include -#include - namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_store_dynamic_panel(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - - Kernel ker; + RegisterMatrix ker; DynamicPanelMatrix c(ker.rows(), ker.columns()), d(ker.rows(), ker.columns()); randomize(c); @@ -39,13 +34,10 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_store_dynamic_dense(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - - Kernel ker; + RegisterMatrix ker; DynamicMatrix c(ker.rows(), ker.columns()), d(ker.rows(), ker.columns()); randomize(c); @@ -62,13 +54,10 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_store_static_dense(State& state) { - using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - - Kernel ker; + RegisterMatrix ker; StaticMatrix c, d; randomize(c); diff --git a/bench/blast/math/simd/Trmm.cpp b/bench/blast/math/simd/Trmm.cpp index ac31b8b..ebead33 100644 --- a/bench/blast/math/simd/Trmm.cpp +++ b/bench/blast/math/simd/Trmm.cpp @@ -4,29 +4,27 @@ #include #include +#include #include #include -#include - namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_trmmLeft(State& state) { - using Kernel = RegisterMatrix; size_t constexpr K = 100; - blaze::StaticMatrix A; - blaze::StaticMatrix B; + StaticMatrix A; + StaticMatrix B; randomize(A); randomize(B); - Kernel ker; + RegisterMatrix ker; for (auto _ : state) { @@ -38,19 +36,18 @@ namespace blast :: benchmark } - template + template static void BM_RegisterMatrix_trmmRight(State& state) { - using Kernel = RegisterMatrix; size_t constexpr K = 100; - blaze::StaticMatrix A; - blaze::StaticMatrix B; + StaticMatrix A; + StaticMatrix B; randomize(A); randomize(B); - Kernel ker; + RegisterMatrix ker; for (auto _ : state) { diff --git a/bench/blast/math/simd/Trsm.cpp b/bench/blast/math/simd/Trsm.cpp index 85f050b..c3baa89 100644 --- a/bench/blast/math/simd/Trsm.cpp +++ b/bench/blast/math/simd/Trsm.cpp @@ -13,18 +13,15 @@ namespace blast :: benchmark { - template + template static void BM_RegisterMatrix_trsm(State& state) { using Kernel = RegisterMatrix; - using Traits = RegisterMatrixTraits; - size_t constexpr m = Traits::rows; - size_t constexpr n = Traits::columns; - StaticPanelMatrix L; + StaticPanelMatrix L; randomize(L); - StaticPanelMatrix A; + StaticPanelMatrix A; randomize(A); Kernel ker; @@ -36,7 +33,7 @@ namespace blast :: benchmark DoNotOptimize(ker); } - setCounters(state.counters, complexity(trsmTag, false, m, n)); + setCounters(state.counters, complexity(trsmTag, false, M, N)); } diff --git a/include/blast/math/register_matrix/Gemm.hpp b/include/blast/math/register_matrix/Gemm.hpp index 6c1f3b8..004d69d 100644 --- a/include/blast/math/register_matrix/Gemm.hpp +++ b/include/blast/math/register_matrix/Gemm.hpp @@ -25,7 +25,7 @@ namespace blast /// R += alpha * A * B, /// where R is M by N, A is M by K, and B is K by N. /// - template + template requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer inline void gemm(RegisterMatrix& r, size_t K, T alpha, PA a, PB b) noexcept @@ -40,7 +40,7 @@ namespace blast /// R(0:md-1, 0:nd-1) += alpha * A * B, /// where R is M by N, A is md by K, and B is K by nd. /// - template + template requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer inline void gemm(RegisterMatrix& r, size_t K, @@ -62,7 +62,7 @@ namespace blast /// T and SO could be inferred from the argument types. /// template < - typename T, size_t M, size_t N, bool SO, + typename T, size_t M, size_t N, StorageOrder SO, typename PA, typename PB, typename PC, typename PD > requires MatrixPointer && (PA::storageOrder == columnMajor) @@ -103,7 +103,7 @@ namespace blast /// T and SO could be inferred from the argument types. /// template < - typename T, size_t M, size_t N, bool SO, + typename T, size_t M, size_t N, StorageOrder SO, typename PA, typename PB, typename PC, typename PD > requires @@ -135,7 +135,7 @@ namespace blast /// T and SO could be inferred from the argument types. /// template < - typename T, size_t M, size_t N, bool SO, + typename T, size_t M, size_t N, StorageOrder SO, typename PA, typename PB, typename PC, typename PD > requires MatrixPointer && (PA::storageOrder == columnMajor) @@ -164,7 +164,7 @@ namespace blast /// T and SO could be inferred from the argument types. /// template < - typename T, size_t M, size_t N, bool SO, + typename T, size_t M, size_t N, StorageOrder SO, typename PA, typename PB, typename PC, typename PD > requires MatrixPointer && (PA::storageOrder == columnMajor) diff --git a/include/blast/math/register_matrix/RegisterMatrix.hpp b/include/blast/math/register_matrix/RegisterMatrix.hpp index 052e4db..a6a8db6 100644 --- a/include/blast/math/register_matrix/RegisterMatrix.hpp +++ b/include/blast/math/register_matrix/RegisterMatrix.hpp @@ -32,14 +32,13 @@ namespace blast /// @tparam N number of columns of the matrix. /// @tparam SO orientation of SIMD registers. /// - template + template class RegisterMatrix { public: static_assert(SO == columnMajor, "Only column-major register matrices are currently supported"); - // TODO: change bool to StorageOrder - static constexpr bool storageOrder = SO; + static constexpr StorageOrder storageOrder = SO; /// @brief Type of matrix elements using ElementType = T; @@ -409,25 +408,25 @@ namespace blast * @tparam N number of columns of the matrix. * @tparam SO orientation of SIMD registers. */ - template - struct StorageOrderHelper> : std::integral_constant {}; + template + struct StorageOrderHelper> : std::integral_constant {}; - template + template inline size_t constexpr rows(RegisterMatrix const& m) noexcept { return m.rows(); } - template + template inline size_t constexpr columns(RegisterMatrix const& m) noexcept { return m.columns(); } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::load(P p) noexcept @@ -440,7 +439,7 @@ namespace blast } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::load(T beta, P p) noexcept @@ -453,7 +452,7 @@ namespace blast } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::load(T beta, P p, size_t m, size_t n) noexcept @@ -481,7 +480,7 @@ namespace blast } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::store(P p) const noexcept @@ -494,7 +493,7 @@ namespace blast } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::store(P p, size_t m, size_t n) const noexcept @@ -516,7 +515,7 @@ namespace blast } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::storeLower(P p) const noexcept @@ -539,7 +538,7 @@ namespace blast } - template + template template requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::storeLower(P p, size_t m, size_t n) const noexcept @@ -561,7 +560,7 @@ namespace blast } - template + template template requires MatrixPointer BLAST_ALWAYS_INLINE void RegisterMatrix::trsm(Side side, UpLo uplo, P A) noexcept @@ -603,7 +602,7 @@ namespace blast } - template + template template requires VectorPointer && (PA::transposeFlag == columnVector) && @@ -630,7 +629,7 @@ namespace blast } - template + template template requires VectorPointer && (PA::transposeFlag == columnVector) && @@ -657,7 +656,7 @@ namespace blast } - template + template template requires VectorPointer && (PA::transposeFlag == columnVector) && @@ -682,7 +681,7 @@ namespace blast } - template + template template requires VectorPointer && (PA::transposeFlag == columnVector) && @@ -707,7 +706,7 @@ namespace blast } - template + template BLAST_ALWAYS_INLINE void RegisterMatrix::potrf() noexcept { static_assert(M >= N, "potrf() not implemented for register matrices with columns more than rows"); @@ -740,7 +739,7 @@ namespace blast } - template + template template requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer BLAST_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, P1 a, UpLo uplo, bool diagonal_unit, P2 b) noexcept @@ -790,7 +789,7 @@ namespace blast } - template + template template requires MatrixPointer && (PB::storageOrder == columnMajor) && MatrixPointer BLAST_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, PB b, PA a, UpLo uplo, bool diagonal_unit) noexcept @@ -839,7 +838,7 @@ namespace blast } - template + template template requires MatrixPointer && (PB::storageOrder == columnMajor) && MatrixPointer BLAST_ALWAYS_INLINE void RegisterMatrix::trmm(T alpha, PB b, PA a, UpLo uplo, bool diagonal_unit, size_t m, size_t n) noexcept @@ -881,8 +880,8 @@ namespace blast } - template - inline bool operator==(RegisterMatrix const& rm, MT const& m) + template + inline bool operator==(RegisterMatrix const& rm, MT const& m) { if (rows(m) != rm.rows() || columns(m) != rm.columns()) return false; @@ -896,8 +895,8 @@ namespace blast } - template - inline bool operator==(MT const& m, RegisterMatrix const& rm) + template + inline bool operator==(MT const& m, RegisterMatrix const& rm) { return rm == m; } From b25aa2828211d2e7260ff801ae934545a3a63bd0 Mon Sep 17 00:00:00 2001 From: Mikhail Katliar Date: Wed, 6 Nov 2024 09:58:46 +0100 Subject: [PATCH 5/5] StorageOrder type untied from Blaze --- include/blast/math/StorageOrder.hpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/include/blast/math/StorageOrder.hpp b/include/blast/math/StorageOrder.hpp index c40f816..ae3f5d5 100644 --- a/include/blast/math/StorageOrder.hpp +++ b/include/blast/math/StorageOrder.hpp @@ -1,28 +1,16 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2023-2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once -#include - namespace blast { enum StorageOrder : bool { - rowMajor = blaze::rowMajor, - columnMajor = blaze::columnMajor + rowMajor = false, + columnMajor = true }; @@ -30,4 +18,4 @@ namespace blast { return so == rowMajor ? columnMajor : rowMajor; } -} \ No newline at end of file +}