From 2cb94d5a8c776063acff5550791fd9853c94638e Mon Sep 17 00:00:00 2001 From: knliege Date: Tue, 2 Apr 2024 22:48:00 -0600 Subject: [PATCH 1/2] Fix BTDS on MI210 without GPU aware MPI --- .../src/Ifpack2_BlockTriDiContainer_impl.hpp | 83 +++++++++++++++---- 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp index 4804ce3099b7..aa9c17ddd587 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp @@ -300,6 +300,7 @@ namespace Ifpack2 { #else using impl_scalar_type_1d_view = typename impl_type::impl_scalar_type_1d_view; #endif + using impl_scalar_type_1d_view_host = Kokkos::View; using impl_scalar_type_2d_view = typename impl_type::impl_scalar_type_2d_view; using impl_scalar_type_2d_view_tpetra = typename impl_type::impl_scalar_type_2d_view_tpetra; @@ -322,6 +323,7 @@ namespace Ifpack2 { SendRecvPair offset_host; // offsets to local id list and data buffer SendRecvPair lids; // local id list SendRecvPair buffer; // data buffer + SendRecvPair buffer_host; // data buffer local_ordinal_type_1d_view dm2cm; // permutation @@ -478,6 +480,11 @@ namespace Ifpack2 { buffer.send = impl_scalar_type_1d_view(do_not_initialize_tag("buffer send"), send_buffer_size); buffer.recv = impl_scalar_type_1d_view(do_not_initialize_tag("buffer recv"), recv_buffer_size); + + if (!Tpetra::Details::Behavior::assumeMpiIsGPUAware()) { + buffer_host.send = impl_scalar_type_1d_view_host(do_not_initialize_tag("buffer send"), send_buffer_size); + buffer_host.recv = impl_scalar_type_1d_view_host(do_not_initialize_tag("buffer recv"), recv_buffer_size); + } } } @@ -558,15 +565,12 @@ namespace Ifpack2 { &reqs.recv[i]); } else { - const auto buffer_recv_host = Kokkos::create_mirror_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.recv); irecv(comm, - reinterpret_cast(buffer_recv_host.data() + offset_host.recv[i]*mv_blocksize), + reinterpret_cast(buffer_host.recv.data() + offset_host.recv[i]*mv_blocksize), (offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type), pids.recv[i], 42, &reqs.recv[i]); - Kokkos::deep_copy(buffer.recv, buffer_recv_host); } } @@ -582,7 +586,21 @@ namespace Ifpack2 { mv, blocksize, //execution_space()); exec_instances[i%8]); - + if (!Tpetra::Details::Behavior::assumeMpiIsGPUAware()) { + //if (i<8) exec_instances[i%8].fence(); + const local_ordinal_type num_vectors = mv.extent(1); + const local_ordinal_type mv_blocksize = blocksize*num_vectors; + + Kokkos::deep_copy(exec_instances[i%8], + Kokkos::subview(buffer_host.send, + Kokkos::pair( + offset_host.send(i)*mv_blocksize, + offset_host.send(i+1)*mv_blocksize)), + Kokkos::subview(buffer.send, + Kokkos::pair( + offset_host.send(i)*mv_blocksize, + offset_host.send(i+1)*mv_blocksize))); + } } /// somehow one unit test fails when we use exec_instance[i%8] //execution_space().fence(); @@ -598,11 +616,8 @@ namespace Ifpack2 { &reqs.send[i]); } else { - const auto buffer_send_host = Kokkos::create_mirror_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.send); - Kokkos::deep_copy(buffer_send_host, buffer.send); isend(comm, - reinterpret_cast(buffer_send_host.data() + offset_host.send[i]*mv_blocksize), + reinterpret_cast(buffer_host.send.data() + offset_host.send[i]*mv_blocksize), (offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type), pids.send[i], 42, @@ -630,6 +645,21 @@ namespace Ifpack2 { // 0.0. wait any waitany(pids.recv.extent(0), reqs.recv.data(), &idx); + if (!Tpetra::Details::Behavior::assumeMpiIsGPUAware()) { + const local_ordinal_type num_vectors = remote_multivector.extent(1); + const local_ordinal_type mv_blocksize = blocksize*num_vectors; + + Kokkos::deep_copy( + Kokkos::subview(buffer.recv, + Kokkos::pair( + offset_host.recv(idx)*mv_blocksize, + offset_host.recv(idx+1)*mv_blocksize)), + Kokkos::subview(buffer_host.recv, + Kokkos::pair( + offset_host.recv(idx)*mv_blocksize, + offset_host.recv(idx+1)*mv_blocksize))); + } + // 0.1. unpack data after data is moved into a device copy(lids.recv, buffer.recv, offset_host.recv(idx), offset_host.recv(idx+1), @@ -731,15 +761,12 @@ namespace Ifpack2 { &reqs.recv[i]); } else { - const auto buffer_recv_host = Kokkos::create_mirror_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.recv); irecv(comm, - reinterpret_cast(buffer_recv_host.data() + offset_host.recv[i]*mv_blocksize), + reinterpret_cast(buffer_host.recv.data() + offset_host.recv[i]*mv_blocksize), (offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type), pids.recv[i], 42, &reqs.recv[i]); - Kokkos::deep_copy(buffer.recv, buffer_recv_host); } } @@ -757,11 +784,20 @@ namespace Ifpack2 { &reqs.send[i]); } else { - const auto buffer_send_host = Kokkos::create_mirror_view( - Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.send); - Kokkos::deep_copy(buffer_send_host, buffer.send); + const local_ordinal_type num_vectors = mv.extent(1); + const local_ordinal_type mv_blocksize = blocksize*num_vectors; + + Kokkos::deep_copy( + Kokkos::subview(buffer_host.send, + Kokkos::pair( + offset_host.send(i)*mv_blocksize, + offset_host.send(i+1)*mv_blocksize)), + Kokkos::subview(buffer.send, + Kokkos::pair( + offset_host.send(i)*mv_blocksize, + offset_host.send(i+1)*mv_blocksize))); isend(comm, - reinterpret_cast(buffer_send_host.data() + offset_host.send[i]*mv_blocksize), + reinterpret_cast(buffer_host.send.data() + offset_host.send[i]*mv_blocksize), (offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type), pids.send[i], 42, @@ -787,6 +823,19 @@ namespace Ifpack2 { for (local_ordinal_type i=0,iend=pids.recv.extent(0);i( + offset_host.recv(idx)*mv_blocksize, + offset_host.recv(idx+1)*mv_blocksize)), + Kokkos::subview(buffer_host.recv, + Kokkos::pair( + offset_host.recv(idx)*mv_blocksize, + offset_host.recv(idx+1)*mv_blocksize))); + } copy(lids.recv, buffer.recv, offset_host.recv(idx), offset_host.recv(idx+1), remote_multivector, blocksize); } From 1bc2ac3a8107f0e32d92c11fc6f4c24ee638e39f Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Apr 2024 17:05:54 -0600 Subject: [PATCH 2/2] Fix -Werror=shadow --- packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp index aa9c17ddd587..8d47c85d1a50 100644 --- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp +++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp @@ -784,9 +784,6 @@ namespace Ifpack2 { &reqs.send[i]); } else { - const local_ordinal_type num_vectors = mv.extent(1); - const local_ordinal_type mv_blocksize = blocksize*num_vectors; - Kokkos::deep_copy( Kokkos::subview(buffer_host.send, Kokkos::pair(