Skip to content

Commit

Permalink
Fix gemm() performance for panel matrices (#43)
Browse files Browse the repository at this point in the history
* Correct alignment deduction for transposed manel matrices

* Implement missing IsAligned and IsPadded type traits specializations

* PanelPotrfTest decoupled from Blaze

* PanelGemmTest decoupled from Blaze

* Conditional use of unaligned matrix pointers in gemm(RegisterMatrix<...>, ...)

* Fixed a typo

* RegisterMatrixTest passes when compiled with GCC

* Static panel matrix pointer benchmark

* Added operator[] to matrix and vector pointers

* Increased inlining threshold for Clang

* Removed broadcast() from matrix and vector pointers

* Explicitly convert B pointer to unaligned in gemm()
  • Loading branch information
mkatliar authored Oct 15, 2024
1 parent 0daa999 commit 94d8a2f
Show file tree
Hide file tree
Showing 34 changed files with 393 additions and 165 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.10) # Need at least 3.10 for gtest_discover_t
project(blast VERSION 0.1 LANGUAGES CXX)

# Enable modern C++
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 23)

# Allow for integration with other tools such as Intellisense
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down
1 change: 1 addition & 0 deletions bench/blast/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_executable(bench-blast
math/panel/DynamicGemm.cpp
math/panel/StaticPotrf.cpp
math/panel/DynamicPotrf.cpp
math/panel/StaticMatrixPointer.cpp
)

target_compile_definitions(bench-blast
Expand Down
34 changes: 34 additions & 0 deletions bench/blast/math/panel/StaticMatrixPointer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include <blast/math/StaticPanelMatrix.hpp>

#include <bench/Benchmark.hpp>


namespace blast :: benchmark
{
template <typename Real, AlignmentFlag AF>
static void BM_static_panel_matrix_pointer(State& state)
{
size_t constexpr M = 8;
size_t constexpr N = 4;

StaticPanelMatrix<Real, M, N, columnMajor> A;
auto pA = ptr<AF>(A, 0, 0);

for (auto _ : state)
{
for (size_t i = 0; i < M; ++i)
for (size_t j = 0; j < N; ++j)
DoNotOptimize(pA(i, j));
}
}


BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, double, aligned);
BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, double, unaligned);
BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, float, aligned);
BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, float, unaligned);
}
2 changes: 1 addition & 1 deletion bench/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ target_link_libraries(bench-blast-common
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# More aggressive inlining with Clang
target_compile_options(bench-blast-common
PUBLIC "-mllvm" "-inline-threshold=1000"
PUBLIC "-mllvm" "-inline-threshold=4000"
)
endif()
2 changes: 2 additions & 0 deletions include/blast/blaze/math/TypeTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@
#include <blast/blaze/math/typetraits/IsStaticallySpaced.hpp>
#include <blast/blaze/math/typetraits/IsDenseVector.hpp>
#include <blast/blaze/math/typetraits/IsDenseMatrix.hpp>
#include <blast/blaze/math/typetraits/IsAligned.hpp>
#include <blast/blaze/math/typetraits/IsPadded.hpp>
#include <blast/blaze/math/typetraits/Spacing.hpp>
#include <blast/blaze/math/typetraits/StorageOrder.hpp>
24 changes: 24 additions & 0 deletions include/blast/blaze/math/typetraits/IsAligned.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/math/typetraits/IsAligned.hpp>

#include <blaze/math/typetraits/IsVector.h>
#include <blaze/math/typetraits/IsMatrix.h>
#include <blaze/math/typetraits/IsAligned.h>


namespace blast
{
/**
* @brief Specialization for Blaze matrix and vector types
*
* @tparam T matrix or vector type
*/
template <typename T>
requires blaze::IsVector_v<T> || blaze::IsMatrix_v<T>
struct IsAligned<T> : blaze::IsAligned<T> {};
}
24 changes: 24 additions & 0 deletions include/blast/blaze/math/typetraits/IsPadded.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/math/typetraits/IsPadded.hpp>

#include <blaze/math/typetraits/IsVector.h>
#include <blaze/math/typetraits/IsMatrix.h>
#include <blaze/math/typetraits/IsPadded.h>


namespace blast
{
/**
* @brief Specialization for Blaze matrix and vector types
*
* @tparam T matrix or vector type
*/
template <typename T>
requires blaze::IsVector_v<T> || blaze::IsMatrix_v<T>
struct IsPadded<T> : blaze::IsPadded<T> {};
}
19 changes: 7 additions & 12 deletions include/blast/math/DynamicPanelMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,6 @@ namespace blaze
, bool SO >
void makePositiveDefinite( DynamicPanelMatrix<Type, SO>& matrix )
{
using blaze::randomize;

BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE( Type );

if( !isSquare( *matrix ) ) {
Expand All @@ -349,16 +347,13 @@ namespace blaze

gemm(A, trans(A), matrix, matrix);

// TODO: implement it as below after the matrix *= ctrans( matrix ) expression works.

// randomize( matrix );
// matrix *= ctrans( matrix );

// for( size_t i=0UL; i<n; ++i ) {
// matrix(i,i) += Type(n);
// }

BLAZE_INTERNAL_ASSERT( isHermitian( matrix ), "Non-symmetric matrix detected" );
// NOTE: if uncommented, the following line results in a compiler error:
// /usr/local/include/blaze/math/Matrix.h:203:13: note: candidate template ignored: invalid explicitly-specified argument for template parameter 'MT'
// 203 | inline bool isHermitian( const Matrix<MT,SO>& m )
// I could not figure out what causes it, but we are going to decouple DynamicPanelMatrix from Blaze,
// so this code will be gone anyway.
//
// BLAZE_INTERNAL_ASSERT( isHermitian( matrix ), "Non-symmetric matrix detected" );
}
/*! \endcond */
//*************************************************************************************************
Expand Down
32 changes: 9 additions & 23 deletions include/blast/math/RowColumnVectorPointer.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright 2023 Mikhail Katliar
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

Expand Down Expand Up @@ -78,12 +68,6 @@ namespace blast
}


SimdVecType broadcast() const noexcept
{
return ptr_.broadcast();
}


void store(SimdVecType const& val) const noexcept
{
ptr_.store(transposeFlag, val);
Expand Down Expand Up @@ -123,13 +107,15 @@ namespace blast


/**
* @brief Get reference to the pointed value.
* @brief Access element at specified offset
*
* @return reference to the pointed value
* @param i offset
*
* @return reference to the element at specified offset
*/
ElementType& operator*() noexcept
ElementType& operator[](ptrdiff_t i) const noexcept
{
return *ptr_;
return transposeFlag == columnVector ? ptr_[i, 0] : ptr_[0, i];
}


Expand Down
4 changes: 2 additions & 2 deletions include/blast/math/algorithm/Gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ namespace blast
M, N,
[&] (auto& ker, size_t i, size_t j)
{
gemm(ker, K, alpha, A(i, 0), B(0, j), beta, C(i, j), D(i, j));
gemm(ker, K, alpha, A(i, 0), (~B)(0, j), beta, C(i, j), D(i, j));
},
[&] (auto& ker, size_t i, size_t j, size_t m, size_t n)
{
gemm(ker, K, alpha, A(i, 0), B(0, j), beta, C(i, j), D(i, j), m, n);
gemm(ker, K, alpha, A(i, 0), (~B)(0, j), beta, C(i, j), D(i, j), m, n);
}
);
}
Expand Down
5 changes: 2 additions & 3 deletions include/blast/math/algorithm/Tile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
#endif

#include <blast/math/StorageOrder.hpp>

#include <cstdlib>
#include <blast/util/Types.hpp>


namespace blast
Expand Down Expand Up @@ -47,7 +46,7 @@ namespace blast
* @param f_partial functor to call on partial tiles
*/
template <typename ET, StorageOrder SO, typename FF, typename FP, typename Arch>
inline void tile(Arch arch, StorageOrder traversal_order, std::size_t m, std::size_t n, FF&& f_full, FP&& f_partial)
inline void tile(Arch arch, StorageOrder traversal_order, size_t m, size_t n, FF&& f_full, FP&& f_partial)
{
detail::tile<ET, SO>(arch, traversal_order, m, n, f_full, f_partial);
}
Expand Down
2 changes: 0 additions & 2 deletions include/blast/math/algorithm/arch/avx2/Tile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

#include <blast/math/Simd.hpp>

#include <cstdlib>


namespace blast :: detail
{
Expand Down
8 changes: 8 additions & 0 deletions include/blast/math/dense/DynamicMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,12 @@ namespace blast

template <typename T, bool SO>
struct StorageOrderHelper<DynamicMatrix<T, SO>> : std::integral_constant<StorageOrder, StorageOrder(SO)> {};


template <typename T, bool SO>
struct IsAligned<DynamicMatrix<T, SO>> : std::integral_constant<bool, true> {};


template <typename T, bool SO>
struct IsPadded<DynamicMatrix<T, SO>> : std::integral_constant<bool, true> {};
}
34 changes: 28 additions & 6 deletions include/blast/math/dense/DynamicMatrixPointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ namespace blast
}


SimdVecType broadcast() const noexcept
{
return *ptr_;
}


void store(SimdVecType const& val) const noexcept
{
val.store(ptr_, AF);
Expand Down Expand Up @@ -119,6 +113,20 @@ namespace blast
}


/**
* @brief Access element at specified offset
*
* @param i row offset
* @param j column offset
*
* @return reference to the element at specified offset
*/
ElementType& operator[](ptrdiff_t i, ptrdiff_t j) const noexcept
{
return *ptrOffset(i, j);
}


/**
* @brief Get reference to the pointed value.
*
Expand Down Expand Up @@ -210,6 +218,20 @@ namespace blast
struct StorageOrderHelper<DynamicMatrixPointer<T, SO, AF, PF>> : std::integral_constant<StorageOrder, StorageOrder(SO)> {};


/**
* @brief Specialization for @a DynamicMatrixPointer
*/
template <typename T, bool SO, bool AF, bool PF>
struct IsAligned<DynamicMatrixPointer<T, SO, AF, PF>> : std::integral_constant<bool, AF> {};


/**
* @brief Specialization for @a DynamicMatrixPointer
*/
template <typename T, bool SO, bool AF, bool PF>
struct IsPadded<DynamicMatrixPointer<T, SO, AF, PF>> : std::integral_constant<bool, PF> {};


template <typename T, bool SO, bool AF, bool PF>
BLAST_ALWAYS_INLINE auto trans(DynamicMatrixPointer<T, SO, AF, PF> const& p) noexcept
{
Expand Down
19 changes: 13 additions & 6 deletions include/blast/math/dense/DynamicVectorPointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ namespace blast
}


SimdVecType broadcast() const noexcept
{
return *ptr_;
}


void store(IntrinsicType val) const noexcept
{
// Non-optimized
Expand Down Expand Up @@ -120,6 +114,19 @@ namespace blast
}


/**
* @brief Access element at specified offset
*
* @param i offset
*
* @return reference to the element at specified offset
*/
ElementType& operator[](ptrdiff_t i) const noexcept
{
return *ptrOffset(i);
}


/**
* @brief Get reference to the pointed value.
*
Expand Down
9 changes: 9 additions & 0 deletions include/blast/math/dense/StaticMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <blast/util/Types.hpp>

#include <initializer_list>
#include <type_traits>


namespace blast
Expand Down Expand Up @@ -214,4 +215,12 @@ namespace blast

template <typename T, size_t M, size_t N, bool SO>
struct StorageOrderHelper<StaticMatrix<T, M, N, SO>> : std::integral_constant<StorageOrder, StorageOrder(SO)> {};


template <typename T, size_t M, size_t N, bool SO>
struct IsAligned<StaticMatrix<T, M, N, SO>> : std::integral_constant<bool, true> {};


template <typename T, size_t M, size_t N, bool SO>
struct IsPadded<StaticMatrix<T, M, N, SO>> : std::integral_constant<bool, true> {};
}
Loading

0 comments on commit 94d8a2f

Please sign in to comment.