Skip to content

Commit

Permalink
Remove dynamic memory allocation inside tight loops in the Plaza refi…
Browse files Browse the repository at this point in the history
…nement code (#3151)

* Tidy up

* Step update

* More transition

* Updates

* Updates

* Updates

* Remove more dynamic memory allocation

* Tidy up

* Update cpp/dolfinx/refinement/plaza.cpp

Co-authored-by: Chris Richardson <[email protected]>

* Reduce array size

* Fix assertion

* Simplify triangle generation

* Update cpp/dolfinx/refinement/plaza.cpp

Co-authored-by: Chris Richardson <[email protected]>

---------

Co-authored-by: Chris Richardson <[email protected]>
  • Loading branch information
garth-wells and chrisrichardson authored Apr 24, 2024
1 parent a546611 commit d5fb3db
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 110 deletions.
75 changes: 42 additions & 33 deletions cpp/dolfinx/refinement/plaza.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ namespace
/// @param[in] uniform If true, the triangle is subdivided into four similar
/// sub-triangles.
/// @returns Local indices for each sub-divived triangle
std::vector<std::int32_t> get_triangles(std::span<const std::int64_t> indices,
const std::int32_t longest_edge,
bool uniform)
std::pair<std::array<std::int32_t, 12>, std::size_t>
get_triangles(std::span<const std::int64_t> indices,
const std::int32_t longest_edge, bool uniform)
{
// NOTE: The assumption below is based on the UFC ordering of a triangle, i.e.
// that the N-th edge of a triangle is the edge where the N-th vertex is not
Expand All @@ -53,39 +53,32 @@ std::vector<std::int32_t> get_triangles(std::span<const std::int64_t> indices,

// If all edges marked, consider uniform refinement
if (uniform and indices[e0] >= 0 and indices[e1] >= 0)
return {e0, e1, v2, e1, e2, v0, e2, e0, v1, e2, e1, e0};
return {{e0, e1, v2, e1, e2, v0, e2, e0, v1, e2, e1, e0}, 12};

// Break each half of triangle into one or two sub-triangles
std::vector<std::int32_t> tri_set;
if (indices[e0] >= 0)
tri_set = {e2, v2, e0, e2, e0, v1};
if (indices[e0] >= 0 and indices[e1] >= 0)
return {{e2, v2, e0, e2, e0, v1, e2, v2, e1, e2, e1, v0}, 12};
else if (indices[e0] >= 0 and indices[e1] < 0)
return {{e2, v2, e0, e2, e0, v1, e2, v2, v0}, 9};
else if (indices[e0] < 0 and indices[e1] >= 0)
return {{e2, v2, v1, e2, v2, e1, e2, e1, v0}, 9};
else
tri_set = {e2, v2, v1};

if (indices[e1] >= 0)
{
tri_set.insert(tri_set.end(), {e2, v2, e1});
tri_set.insert(tri_set.end(), {e2, e1, v0});
}
else
tri_set.insert(tri_set.end(), {e2, v2, v0});

return tri_set;
return {{e2, v2, v1, e2, v2, v0}, 6};
}

//-----------------------------------------------------------------------------
// 3D version of subdivision
std::vector<std::int32_t>
std::pair<std::array<std::int32_t, 32>, std::size_t>
get_tetrahedra(std::span<const std::int64_t> indices,
std::span<const std::int32_t> longest_edge)
{
// Connectivity matrix for ten possible points (4 vertices + 6 edge
// midpoints) ordered {v0, v1, v2, v3, e0, e1, e2, e3, e4, e5} Only need
// upper triangle, but sometimes it is easier just to insert both entries
// (j,i) and (i,j).
// midpoints) ordered {v0, v1, v2, v3, e0, e1, e2, e3, e4, e5} Only
// need upper triangle, but sometimes it is easier just to insert both
// entries (j,i) and (i,j).
bool conn[10][10] = {};

// Edge connectivity to vertices (and by extension facets)
static const std::int32_t edges[6][2]
constexpr std::int32_t edges[6][2]
= {{2, 3}, {1, 3}, {1, 2}, {0, 3}, {0, 2}, {0, 1}};

// Iterate through cell edges
Expand Down Expand Up @@ -148,38 +141,51 @@ get_tetrahedra(std::span<const std::int64_t> indices,
}

// Iterate through all possible new vertices
std::vector<std::int32_t> facet_set, tet_set;
std::array<std::int32_t, 32> tet_set;
tet_set.fill(-1);
std::size_t tet_set_size = 0;
for (std::int32_t i = 0; i < 10; ++i)
{
for (std::int32_t j = i + 1; j < 10; ++j)
{
if (conn[i][j])
{
facet_set.clear();
std::array<std::int32_t, 10> facet_set_b;
std::span<std::int32_t> facet_set(facet_set_b.data(), 0);
for (std::int32_t k = j + 1; k < 10; ++k)
{
if (conn[i][k] and conn[j][k])
{
// Note that i < j < m < k
for (const std::int32_t& m : facet_set)
for (std::int32_t m : facet_set)
{
if (conn[m][k])
tet_set.insert(tet_set.end(), {i, j, m, k});
facet_set.push_back(k);
{
assert(tet_set_size + 4 <= tet_set.size());
tet_set[tet_set_size++] = i;
tet_set[tet_set_size++] = j;
tet_set[tet_set_size++] = m;
tet_set[tet_set_size++] = k;
}
}

facet_set = std::span(facet_set.data(), facet_set.size() + 1);
facet_set.back() = k;
}
}
}
}
}

return tet_set;
return {std::move(tet_set), tet_set_size};
}
//-----------------------------------------------------------------------------
} // namespace

//-----------------------------------------------------------------------------
void plaza::impl::enforce_rules(MPI_Comm comm,
const graph::AdjacencyList<int>& shared_edges,
std::vector<std::int8_t>& marked_edges,
std::span<std::int8_t> marked_edges,
const mesh::Topology& topology,
std::span<const std::int32_t> long_edge)
{
Expand Down Expand Up @@ -242,15 +248,18 @@ void plaza::impl::enforce_rules(MPI_Comm comm,
}
}
//-----------------------------------------------------------------------------
std::vector<std::int32_t>
std::pair<std::array<std::int32_t, 32>, std::size_t>
plaza::impl::get_simplices(std::span<const std::int64_t> indices,
std::span<const std::int32_t> longest_edge, int tdim,
bool uniform)
{
if (tdim == 2)
{
assert(longest_edge.size() == 1);
return get_triangles(indices, longest_edge[0], uniform);
auto [_d, size] = get_triangles(indices, longest_edge[0], uniform);
std::array<std::int32_t, 32> d;
std::copy_n(_d.begin(), size, d.begin());
return {d, size};
}
else if (tdim == 3)
{
Expand Down
158 changes: 83 additions & 75 deletions cpp/dolfinx/refinement/plaza.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// SPDX-License-Identifier: LGPL-3.0-or-later

#include "utils.h"
#include <cmath>
#include <cstdint>
#include <dolfinx/common/MPI.h>
#include <dolfinx/graph/AdjacencyList.h>
Expand Down Expand Up @@ -45,70 +46,73 @@ namespace impl
/// @param simplex_set - index into indices for each child cell
/// @return mapping from child to parent facets, using cell-local index
template <int tdim>
std::vector<std::int8_t>
compute_parent_facets(std::span<const std::int32_t> simplex_set)
auto compute_parent_facets(std::span<const std::int32_t> simplex_set)
{
static_assert(tdim == 2 or tdim == 3);
assert(simplex_set.size() % (tdim + 1) == 0);
using parent_facet_t
= std::conditional<tdim == 2, std::array<std::int8_t, 12>,
std::array<std::int8_t, 32>>::type;
parent_facet_t parent_facet;
parent_facet.fill(-1);
assert(simplex_set.size() <= parent_facet.size());

// Index lookups in 'indices' for the child vertices that occur on
// each parent facet in 2D and 3D. In 2D each edge has 3 child
// vertices, and in 3D each triangular facet has six child vertices.
constexpr std::array<std::array<int, 3>, 3> facet_table_2d{
{{1, 2, 3}, {0, 2, 4}, {0, 1, 5}}};

constexpr std::array<std::array<int, 6>, 4> facet_table_3d{
{{1, 2, 3, 4, 5, 6},
{0, 2, 3, 4, 7, 8},
{0, 1, 3, 5, 7, 9},
{0, 1, 2, 6, 8, 9}}};

const int ncells = simplex_set.size() / (tdim + 1);
for (int fpi = 0; fpi < (tdim + 1); ++fpi)
{
assert(simplex_set.size() % (tdim + 1) == 0);
std::vector<std::int8_t> parent_facet(simplex_set.size(), -1);

// Index lookups in 'indices' for the child vertices that occur on
// each parent facet in 2D and 3D. In 2D each edge has 3 child
// vertices, and in 3D each triangular facet has six child vertices.
constexpr std::array<std::array<int, 3>, 3> facet_table_2d{
{{1, 2, 3}, {0, 2, 4}, {0, 1, 5}}};

constexpr std::array<std::array<int, 6>, 4> facet_table_3d{
{{1, 2, 3, 4, 5, 6},
{0, 2, 3, 4, 7, 8},
{0, 1, 3, 5, 7, 9},
{0, 1, 2, 6, 8, 9}}};

const int ncells = simplex_set.size() / (tdim + 1);
for (int fpi = 0; fpi < (tdim + 1); ++fpi)
// For each child cell, consider all facets
for (int cc = 0; cc < ncells; ++cc)
{
// For each child cell, consider all facets
for (int cc = 0; cc < ncells; ++cc)
for (int fci = 0; fci < (tdim + 1); ++fci)
{
for (int fci = 0; fci < (tdim + 1); ++fci)
{
// Indices of all vertices on child facet, sorted
std::array<int, tdim> cf, set_output;
// Indices of all vertices on child facet, sorted
std::array<int, tdim> cf, set_output;

int num_common_vertices;
if constexpr (tdim == 2)
{
for (int j = 0; j < tdim; ++j)
cf[j] = simplex_set[cc * 3 + facet_table_2d[fci][j]];
std::sort(cf.begin(), cf.end());
auto it = std::set_intersection(
facet_table_2d[fpi].begin(), facet_table_2d[fpi].end(),
cf.begin(), cf.end(), set_output.begin());
num_common_vertices = std::distance(set_output.begin(), it);
}
else
{
for (int j = 0; j < tdim; ++j)
cf[j] = simplex_set[cc * 4 + facet_table_3d[fci][j]];
std::sort(cf.begin(), cf.end());
auto it = std::set_intersection(
facet_table_3d[fpi].begin(), facet_table_3d[fpi].end(),
cf.begin(), cf.end(), set_output.begin());
num_common_vertices = std::distance(set_output.begin(), it);
}
int num_common_vertices;
if constexpr (tdim == 2)
{
for (int j = 0; j < tdim; ++j)
cf[j] = simplex_set[cc * 3 + facet_table_2d[fci][j]];
std::sort(cf.begin(), cf.end());
auto it = std::set_intersection(facet_table_2d[fpi].begin(),
facet_table_2d[fpi].end(), cf.begin(),
cf.end(), set_output.begin());
num_common_vertices = std::distance(set_output.begin(), it);
}
else
{
for (int j = 0; j < tdim; ++j)
cf[j] = simplex_set[cc * 4 + facet_table_3d[fci][j]];
std::sort(cf.begin(), cf.end());
auto it = std::set_intersection(facet_table_3d[fpi].begin(),
facet_table_3d[fpi].end(), cf.begin(),
cf.end(), set_output.begin());
num_common_vertices = std::distance(set_output.begin(), it);
}

if (num_common_vertices == tdim)
{
assert(parent_facet[cc * (tdim + 1) + fci] == -1);
// Child facet "fci" of cell cc, lies on parent facet "fpi"
parent_facet[cc * (tdim + 1) + fci] = fpi;
}
if (num_common_vertices == tdim)
{
assert(parent_facet[cc * (tdim + 1) + fci] == -1);
// Child facet "fci" of cell cc, lies on parent facet "fpi"
parent_facet[cc * (tdim + 1) + fci] = fpi;
}
}
}

return parent_facet;
}

return parent_facet;
}

/// Get the subdivision of an original simplex into smaller simplices,
Expand All @@ -127,15 +131,15 @@ compute_parent_facets(std::span<const std::int32_t> simplex_set)
/// @param[in] uniform Make a "uniform" subdivision with all triangles being
/// similar shape
/// @return
std::vector<std::int32_t>
std::pair<std::array<std::int32_t, 32>, std::size_t>
get_simplices(std::span<const std::int64_t> indices,
std::span<const std::int32_t> longest_edge, int tdim,
bool uniform);

/// Propagate edge markers according to rules (longest edge of each
/// face must be marked, if any edge of face is marked)
void enforce_rules(MPI_Comm comm, const graph::AdjacencyList<int>& shared_edges,
std::vector<std::int8_t>& marked_edges,
std::span<std::int8_t> marked_edges,
const mesh::Topology& topology,
std::span<const std::int32_t> long_edge);

Expand Down Expand Up @@ -169,7 +173,7 @@ face_long_edge(const mesh::Mesh<T>& mesh)

// Check mesh face quality (may be used in 2D to switch to "uniform"
// refinement)
const T min_ratio = sqrt(2.0) / 2.0;
const T min_ratio = std::sqrt(2.0) / 2.0;
if (tdim == 2)
edge_ratio_ok.resize(num_faces);

Expand Down Expand Up @@ -293,21 +297,20 @@ std::tuple<graph::AdjacencyList<std::int64_t>, std::vector<T>,
std::array<std::size_t, 2>, std::vector<std::int32_t>,
std::vector<std::int8_t>>
compute_refinement(MPI_Comm neighbor_comm,
const std::vector<std::int8_t>& marked_edges,
std::span<const std::int8_t> marked_edges,
const graph::AdjacencyList<int>& shared_edges,
const mesh::Mesh<T>& mesh,
const std::vector<std::int32_t>& long_edge,
const std::vector<std::int8_t>& edge_ratio_ok,
std::span<const std::int32_t> long_edge,
std::span<const std::int8_t> edge_ratio_ok,
plaza::Option option)
{
int tdim = mesh.topology()->dim();
int num_cell_edges = tdim * 3 - 3;
int num_cell_vertices = tdim + 1;
bool compute_facets = (option == plaza::Option::parent_facet
or option == plaza::Option::parent_cell_and_facet);
bool compute_parent_cell
= (option == plaza::Option::parent_cell
or option == plaza::Option::parent_cell_and_facet);
bool compute_facets = option == plaza::Option::parent_facet
or option == plaza::Option::parent_cell_and_facet;
bool compute_parent_cell = option == plaza::Option::parent_cell
or option == plaza::Option::parent_cell_and_facet;

// Make new vertices in parallel
const auto [new_vertex_map, new_vertex_coords, xshape]
Expand All @@ -334,7 +337,7 @@ compute_refinement(MPI_Comm neighbor_comm,
std::vector<std::int64_t> global_indices
= adjust_indices(*mesh.topology()->index_map(0), num_new_vertices_local);

const int num_cells = map_c->size_local();
const std::int32_t num_cells = map_c->size_local();

// Iterate over all cells, and refine if cell has a marked edge
std::vector<std::int64_t> cell_topology;
Expand Down Expand Up @@ -403,13 +406,13 @@ compute_refinement(MPI_Comm neighbor_comm,
}

const bool uniform = (tdim == 2) ? edge_ratio_ok[c] : false;

// FIXME: this has an expensive dynamic memory allocation
simplex_set = get_simplices(indices, longest_edge, tdim, uniform);
const auto [simplex_set_b, simplex_set_size]
= get_simplices(indices, longest_edge, tdim, uniform);
std::span<const std::int32_t> simplex_set(simplex_set_b.data(),
simplex_set_size);

// Save parent index
const std::int32_t ncells = simplex_set.size() / num_cell_vertices;

if (compute_parent_cell)
{
for (std::int32_t i = 0; i < ncells; ++i)
Expand All @@ -418,12 +421,18 @@ compute_refinement(MPI_Comm neighbor_comm,

if (compute_facets)
{
std::vector<std::int8_t> npf;
if (tdim == 3)
npf = compute_parent_facets<3>(simplex_set);
{
auto npf = compute_parent_facets<3>(simplex_set);
parent_facet.insert(parent_facet.end(), npf.begin(),
std::next(npf.begin(), simplex_set.size()));
}
else
npf = compute_parent_facets<2>(simplex_set);
parent_facet.insert(parent_facet.end(), npf.begin(), npf.end());
{
auto npf = compute_parent_facets<2>(simplex_set);
parent_facet.insert(parent_facet.end(), npf.begin(),
std::next(npf.begin(), simplex_set.size()));
}
}

// Convert from cell local index to mesh index and add to cells
Expand Down Expand Up @@ -688,5 +697,4 @@ compute_refinement_data(const mesh::Mesh<T>& mesh,
return {std::move(cell_adj), std::move(new_vertex_coords), xshape,
std::move(parent_cell), std::move(parent_facet)};
}

} // namespace dolfinx::refinement::plaza
Loading

0 comments on commit d5fb3db

Please sign in to comment.