diff --git a/include/blast/math/dense/StaticMatrix.hpp b/include/blast/math/dense/StaticMatrix.hpp new file mode 100644 index 00000000..f5e277a3 --- /dev/null +++ b/include/blast/math/dense/StaticMatrix.hpp @@ -0,0 +1,176 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace blast +{ + /// @brief Matrix with statically defined size. + /// + /// @tparam T element type of the matrix + /// @tparam M number of rows + /// @tparam N number of columns + /// @tparam SO storage order + template + class StaticMatrix + { + public: + using ElementType = T; + static bool constexpr storageOrder = SO; + + + StaticMatrix() noexcept + { + // Initialize padding elements to 0 to prevent denorms in calculations. + // Denorms can significantly impair performance, see https://github.com/giaf/blasfeo/issues/103 + std::fill_n(v_, capacity_, T {}); + } + + + StaticMatrix(T const& v) noexcept + { + std::fill_n(v_, capacity_, v); + } + + + constexpr StaticMatrix(std::initializer_list> list) + { + std::fill_n(v_, capacity_, T {}); + + if (list.size() != M || determineColumns(list) > N) + throw std::invalid_argument {"Invalid setup of static matrix"}; + + size_t i = 0; + + for (auto const& row : list) + { + size_t j = 0; + + for (const auto& element : row) + { + v_[elementIndex(i, j)] = element; + ++j; + } + + ++i; + } + } + + + StaticMatrix& operator=(T val) noexcept + { + for (size_t i = 0; i < M; ++i) + for (size_t j = 0; j < N; ++j) + (*this)(i, j) = val; + + return *this; + } + + + constexpr T const& operator()(size_t i, size_t j) const noexcept + { + return v_[elementIndex(i, j)]; + } + + + constexpr T& operator()(size_t i, size_t j) + { + return v_[elementIndex(i, j)]; + } + + + static size_t constexpr rows() noexcept + { + return M; + } + + + static size_t constexpr columns() noexcept + { + return N; + } + + + static size_t constexpr spacing() noexcept + { + return spacing_; + } + + + T * data() noexcept + { + return v_; + } + + + T const * data() const noexcept + { + return v_; + } + + + private: + static size_t constexpr spacing_ = nextMultiple(SO == columnMajor ? M : N, SimdSize_v); + static size_t constexpr capacity_ = spacing_ * (SO == columnMajor ? N : M); + + // Alignment of the data elements. + static size_t constexpr alignment_ = CACHE_LINE_SIZE; + + // Aligned element storage. + alignas(alignment_) T v_[capacity_]; + + + size_t elementIndex(size_t i, size_t j) const + { + return SO == columnMajor ? i + spacing_ * j : spacing_ * i + j; + } + }; + + + template + inline size_t constexpr rows(StaticMatrix const& m) noexcept + { + return m.rows(); + } + + + template + inline size_t constexpr columns(StaticMatrix const& m) noexcept + { + return m.columns(); + } + + + template + inline constexpr T * data(StaticMatrix& m) noexcept + { + return m.data(); + } + + + template + inline constexpr T const * data(StaticMatrix const& m) noexcept + { + return m.data(); + } + + + template + struct IsDenseMatrix> : std::true_type {}; + + + template + struct IsStatic> : std::true_type {}; +} diff --git a/include/blast/math/expressions/PanelMatrix.hpp b/include/blast/math/expressions/PanelMatrix.hpp index d0494887..80de78cf 100644 --- a/include/blast/math/expressions/PanelMatrix.hpp +++ b/include/blast/math/expressions/PanelMatrix.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -21,12 +22,9 @@ namespace blast { - using namespace blaze; - - template struct PanelMatrix - : public Matrix + : public blaze::Matrix { public: using TagType = Group0; @@ -80,14 +78,14 @@ namespace blast template inline auto assign(PanelMatrix& lhs, blaze::DMatTDMatMultExpr const& rhs) - -> blaze::EnableIf_t && IsRowMajorMatrix_v && IsPanelMatrix_v && IsRowMajorMatrix_v> + -> blaze::EnableIf_t && blaze::IsRowMajorMatrix_v && IsPanelMatrix_v && blaze::IsRowMajorMatrix_v> { BLAZE_THROW_LOGIC_ERROR("Not implemented 2"); } template - inline void assign(DenseMatrix& lhs, PanelMatrix const& rhs) + inline void assign(blaze::DenseMatrix& lhs, PanelMatrix const& rhs) { BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" ); BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" ); @@ -176,7 +174,7 @@ namespace blast template - inline void assign(PanelMatrix& lhs, DenseMatrix const& rhs) + inline void assign(PanelMatrix& lhs, blaze::DenseMatrix const& rhs) { size_t const m = (*rhs).rows(); size_t const n = (*rhs).columns(); diff --git a/include/blast/math/reference/Ger.hpp b/include/blast/math/reference/Ger.hpp new file mode 100644 index 00000000..097856e9 --- /dev/null +++ b/include/blast/math/reference/Ger.hpp @@ -0,0 +1,21 @@ +// Copyright (c) 2019-2024 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +#pragma once + +#include +#include +#include + + +namespace blast :: reference +{ + template + requires VectorPointer && VectorPointer && MatrixPointer + inline void ger(size_t m, size_t n, Real alpha, VPX x, VPY y, MPA a) + { + for (size_t i = 0; i < m; ++i) + for (size_t j = 0; j < n; ++j) + *a(i, j) += alpha * *x(i) * *y(j); + } +} diff --git a/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp b/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp index 9cd7f826..caa0eeb3 100644 --- a/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp +++ b/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp @@ -6,14 +6,16 @@ #include #include +#include #include #include +#include #include +#include #include #include #include -#include #include #include @@ -31,12 +33,12 @@ namespace blast /// @tparam SO orientation of SIMD registers. template class DynamicRegisterMatrix - : public Matrix, SO> + : public blaze::Matrix, SO> { public: static_assert(SO == columnMajor, "Only column-major register matrices are currently supported"); - using BaseType = Matrix, SO>; + using BaseType = blaze::Matrix, SO>; using BaseType::storageOrder; /// @brief Type of matrix elements @@ -135,7 +137,7 @@ namespace blast /// @brief R(0:m-1, 0:n-1) += beta * A template - requires MatrixPointer && (PA::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) void axpy(T beta, PA a) noexcept { #pragma unroll @@ -148,25 +150,25 @@ namespace blast /// @brief Load from memory template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void load(P p) noexcept; /// @brief Load from memory template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void load(T beta, P p) noexcept; /// @brief Store matrix at location pointed by \a p template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void store(P p) const noexcept; /// @brief Store lower-triangular part of the matrix at location pointed by \a p. template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void storeLower(P p) const noexcept; @@ -178,7 +180,7 @@ namespace blast /// @param a pointer to the first element of the first matrix. /// @param b pointer to the first element of the second matrix. template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer void ger(T alpha, PA a, PB b) noexcept; @@ -189,7 +191,7 @@ namespace blast /// @param a pointer to the first element of the first matrix. /// @param b pointer to the first element of the second matrix. template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer void ger(PA a, PB b) noexcept; @@ -202,7 +204,7 @@ namespace blast /// @brief a pointer to a triangular matrix /// template - requires MatrixPointer + requires MatrixPointer void trsmRightUpper(P a); @@ -222,7 +224,7 @@ namespace blast /// @param b general matrix. /// template - requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer void trmmLeftUpper(T alpha, P1 a, P2 b) noexcept; @@ -242,7 +244,7 @@ namespace blast /// @param b general matrix. /// template - requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer void trmmRightLower(T alpha, P1 a, P2 b) noexcept; @@ -289,7 +291,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::load(P p) noexcept { #pragma unroll @@ -302,7 +304,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::load(T beta, P p) noexcept { #pragma unroll @@ -315,7 +317,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::store(P p) const noexcept { // The compile-time constant size of the j loop in combination with the if() expression @@ -337,7 +339,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::storeLower(P p) const noexcept { for (size_t j = 0; j < N; ++j) if (j < n_) @@ -393,7 +395,7 @@ namespace blast template template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer BLAZE_ALWAYS_INLINE void DynamicRegisterMatrix::ger(T alpha, PA a, PB b) noexcept { SimdVecType ax[RM]; @@ -416,7 +418,7 @@ namespace blast template template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer BLAZE_ALWAYS_INLINE void DynamicRegisterMatrix::ger(PA a, PB b) noexcept { SimdVecType ax[RM]; @@ -550,23 +552,23 @@ namespace blast // } - template - inline bool operator==(DynamicRegisterMatrix const& rm, Matrix const& m) + template + inline bool operator==(DynamicRegisterMatrix const& rm, MT const& m) { if (rows(m) != rm.rows() || columns(m) != rm.columns()) return false; for (size_t i = 0; i < rm.rows(); ++i) for (size_t j = 0; j < rm.columns(); ++j) - if (rm(i, j) != (*m)(i, j)) + if (rm(i, j) != m(i, j)) return false; return true; } - template - inline bool operator==(Matrix const& m, DynamicRegisterMatrix const& rm) + template + inline bool operator==(MT const& m, DynamicRegisterMatrix const& rm) { return rm == m; } @@ -576,9 +578,9 @@ namespace blast typename T, size_t M, size_t N, bool SO, typename PA, typename PB, typename PC, typename PD > - requires MatrixPointer && (PA::storageOrder == columnMajor) - && MatrixPointer - && MatrixPointer && (PC::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) + && MatrixPointer + && MatrixPointer && (PC::storageOrder == columnMajor) BLAZE_ALWAYS_INLINE void gemm(DynamicRegisterMatrix& ker, size_t K, T alpha, PA a, PB b, T beta, PC c, PD d) noexcept { diff --git a/include/blast/math/register_matrix/RegisterMatrix.hpp b/include/blast/math/register_matrix/RegisterMatrix.hpp index 631b530d..156031c0 100644 --- a/include/blast/math/register_matrix/RegisterMatrix.hpp +++ b/include/blast/math/register_matrix/RegisterMatrix.hpp @@ -7,9 +7,12 @@ #include #include #include +#include #include #include #include +#include +#include #include #include @@ -22,9 +25,6 @@ namespace blast { - using namespace blaze; - - /// @brief Register-resident matrix /// /// The RegisterMatrix class provides basic linear algebra operations that can be performed @@ -39,12 +39,12 @@ namespace blast /// template class RegisterMatrix - : public Matrix, SO> + : public blaze::Matrix, SO> { public: static_assert(SO == columnMajor, "Only column-major register matrices are currently supported"); - using BaseType = Matrix, SO>; + using BaseType = blaze::Matrix, SO>; using BaseType::storageOrder; /// @brief Type of matrix elements @@ -131,7 +131,7 @@ namespace blast /// @brief R += beta * A template - requires MatrixPointer && (PA::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) void axpy(T beta, PA a) noexcept { SimdVecType const beta_simd {beta}; @@ -146,7 +146,7 @@ namespace blast /// @brief R(0:m-1, 0:n-1) += beta * A template - requires MatrixPointer && (PA::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) void axpy(T beta, PA a, size_t m, size_t n) noexcept { SimdVecType const beta_simd {beta}; @@ -191,7 +191,7 @@ namespace blast /// @brief Store lower-triangular part of the matrix at location pointed by \a p. template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void storeLower(P p) const noexcept; @@ -466,7 +466,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::store(P p) const noexcept { #pragma unroll @@ -479,7 +479,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::store(P p, size_t m, size_t n) const noexcept { // The compile-time constant size of the j loop in combination with the if() expression @@ -501,7 +501,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::storeLower(P p) const noexcept { for (size_t j = 0; j < N; ++j) @@ -524,7 +524,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::storeLower(P p, size_t m, size_t n) const noexcept { for (size_t j = 0; j < N; ++j) if (j < n) @@ -802,23 +802,23 @@ namespace blast } - template - inline bool operator==(RegisterMatrix const& rm, Matrix const& m) + template + inline bool operator==(RegisterMatrix const& rm, MT const& m) { if (rows(m) != rm.rows() || columns(m) != rm.columns()) return false; for (size_t i = 0; i < rm.rows(); ++i) for (size_t j = 0; j < rm.columns(); ++j) - if (rm(i, j) != (*m)(i, j)) + if (rm(i, j) != m(i, j)) return false; return true; } - template - inline bool operator==(Matrix const& m, RegisterMatrix const& rm) + template + inline bool operator==(MT const& m, RegisterMatrix const& rm) { return rm == m; } diff --git a/include/blast/util/NextMultiple.hpp b/include/blast/util/NextMultiple.hpp new file mode 100644 index 00000000..8d9fa47e --- /dev/null +++ b/include/blast/util/NextMultiple.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include + + +namespace blast +{ + /** + * @brief Rounds up an integral value to the next multiple of a given factor. + // + // @param value The integral value to be rounded up \f$[1..\infty)\f$. + // @param factor The factor of the multiple \f$[1..\infty)\f$. + // @return The next multiple of the given factor. + // + // This function rounds up the given integral value to the next multiple of the given integral + // factor. In case the integral value is already a multiple of the given factor, the value itself + // is returned. + */ + inline size_t constexpr nextMultiple(size_t value, size_t factor) noexcept + { + return value + (factor - value % factor) % factor; + } +} diff --git a/test/blast/math/simd/RegisterMatrixTest.cpp b/test/blast/math/simd/RegisterMatrixTest.cpp index a09865f1..606c97d3 100644 --- a/test/blast/math/simd/RegisterMatrixTest.cpp +++ b/test/blast/math/simd/RegisterMatrixTest.cpp @@ -3,18 +3,17 @@ // license that can be found in the LICENSE file. #include -#include +#include #include #include #include #include +#include #include #include #include -#include - namespace blast :: testing { @@ -133,7 +132,7 @@ namespace blast :: testing StaticMatrix A; for (size_t i = 0; i < rows(A); ++i) for (size_t j = 0; j < columns(A); ++j) - (*A)(i, j) = 1000 * i + j; + A(i, j) = 1000 * i + j; for (size_t m = 1; m <= rows(A); ++m) { @@ -181,7 +180,7 @@ namespace blast :: testing using ET = ElementType_t; StaticMatrix A, B(0.); - randomize(A); + blast::randomize(A); RM ker; ker.load(1., ptr(A, 0, 0)); @@ -379,7 +378,9 @@ namespace blast :: testing ker.load(1., ptr(C)); ker.ger(alpha, ptr(a), ptr(b)); - BLAST_EXPECT_APPROX_EQ(ker, evaluate(C + alpha * a * b), absTol(), relTol()); + reference::ger(rows(C), columns(C), alpha, ptr(a), ptr(b), ptr(C)); + + BLAST_EXPECT_APPROX_EQ(ker, C, absTol(), relTol()); } @@ -643,4 +644,4 @@ namespace blast :: testing // TODO: should be strictly equal? BLAST_ASSERT_APPROX_EQ(ker, alpha * B * A, absTol(), relTol()); } -} \ No newline at end of file +}