From 4d003a0ce9a02129bb683b8e90b65aa9b4ae809d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 2 Jan 2025 16:16:03 -0800 Subject: [PATCH 01/20] relabel clusters ids --- cpp/src/community/leiden_impl.cuh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index c3600ff12e..503c5a1d4e 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -713,6 +713,25 @@ std::pair leiden( detail::flatten_leiden_dendrogram(handle, graph_view, *dendrogram, clustering); + // Get unique cluster id + size_t local_num_verts = (*dendrogram).get_level_size_nocheck(0); + rmm::device_uvector unique_cluster_ids(local_num_verts, handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + clustering, + clustering + local_num_verts, + unique_cluster_ids.begin()); + + thrust::sort(handle.get_thrust_policy(), unique_cluster_ids.begin(), unique_cluster_ids.end()); + + unique_cluster_ids.resize( + thrust::distance( + unique_cluster_ids.begin(), + thrust::unique(handle.get_thrust_policy(), unique_cluster_ids.begin(), unique_cluster_ids.end())), + handle.get_stream()); + + detail::relabel_cluster_ids(handle, unique_cluster_ids, clustering, local_num_verts); + return std::make_pair(dendrogram->num_levels(), modularity); } From 3e06c06f000429695e11b6c6a1bc457da0df6542 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 2 Jan 2025 16:49:47 -0800 Subject: [PATCH 02/20] leiden expects unique seed per GPU --- python/cugraph/cugraph/dask/community/leiden.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index bdcf9edc7b..beff0b59b5 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -156,13 +156,13 @@ def leiden( input_graph._plc_graph[w], max_iter, resolution, - random_state, + random_state + i, theta, do_expensive_check, workers=[w], allow_other_workers=False, ) - for w in Comms.get_workers() + for i, w in enumerate(Comms.get_workers()) ] wait(result) From 0675c73b05a7e1895feb2d18b31d153e47a70405 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 2 Jan 2025 17:33:29 -0800 Subject: [PATCH 03/20] properly relabel cluster ids in multi_gpu case --- cpp/src/community/leiden_impl.cuh | 35 ++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 503c5a1d4e..ecd3d6b853 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -729,8 +729,41 @@ std::pair leiden( unique_cluster_ids.begin(), thrust::unique(handle.get_thrust_policy(), unique_cluster_ids.begin(), unique_cluster_ids.end())), handle.get_stream()); + + if constexpr (multi_gpu) { + auto recvcounts = cugraph::host_scalar_allgather( + handle.get_comms(), unique_cluster_ids.size(), handle.get_stream()); + + std::vector displacements(recvcounts.size()); + std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); + rmm::device_uvector allgathered_unique_cluster_ids(displacements.back() + recvcounts.back(), + handle.get_stream()); + cugraph::device_allgatherv(handle.get_comms(), + unique_cluster_ids.begin(), + allgathered_unique_cluster_ids.begin(), + recvcounts, + displacements, + handle.get_stream()); + + thrust::sort( + handle.get_thrust_policy(), + allgathered_unique_cluster_ids.begin(), + allgathered_unique_cluster_ids.end()); + + allgathered_unique_cluster_ids.resize( + thrust::distance( + allgathered_unique_cluster_ids.begin(), + thrust::unique(handle.get_thrust_policy(), + allgathered_unique_cluster_ids.begin(), + allgathered_unique_cluster_ids.end())), + handle.get_stream()); + + detail::relabel_cluster_ids(handle, allgathered_unique_cluster_ids, clustering, local_num_verts); + + } else { + detail::relabel_cluster_ids(handle, unique_cluster_ids, clustering, local_num_verts); + } - detail::relabel_cluster_ids(handle, unique_cluster_ids, clustering, local_num_verts); return std::make_pair(dendrogram->num_levels(), modularity); } From 2b1ac70272c2f9a08d688496c4ae88104acb2c62 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 2 Jan 2025 17:37:56 -0800 Subject: [PATCH 04/20] fix style --- cpp/src/community/leiden_impl.cuh | 63 +++++++++---------- .../cugraph/cugraph/dask/community/leiden.py | 2 +- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index ecd3d6b853..cdc119ad32 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -718,53 +718,52 @@ std::pair leiden( rmm::device_uvector unique_cluster_ids(local_num_verts, handle.get_stream()); thrust::copy(handle.get_thrust_policy(), - clustering, - clustering + local_num_verts, - unique_cluster_ids.begin()); + clustering, + clustering + local_num_verts, + unique_cluster_ids.begin()); thrust::sort(handle.get_thrust_policy(), unique_cluster_ids.begin(), unique_cluster_ids.end()); - unique_cluster_ids.resize( - thrust::distance( - unique_cluster_ids.begin(), - thrust::unique(handle.get_thrust_policy(), unique_cluster_ids.begin(), unique_cluster_ids.end())), - handle.get_stream()); - + unique_cluster_ids.resize(thrust::distance(unique_cluster_ids.begin(), + thrust::unique(handle.get_thrust_policy(), + unique_cluster_ids.begin(), + unique_cluster_ids.end())), + handle.get_stream()); + if constexpr (multi_gpu) { auto recvcounts = cugraph::host_scalar_allgather( handle.get_comms(), unique_cluster_ids.size(), handle.get_stream()); std::vector displacements(recvcounts.size()); std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); - rmm::device_uvector allgathered_unique_cluster_ids(displacements.back() + recvcounts.back(), - handle.get_stream()); + rmm::device_uvector allgathered_unique_cluster_ids( + displacements.back() + recvcounts.back(), handle.get_stream()); cugraph::device_allgatherv(handle.get_comms(), - unique_cluster_ids.begin(), - allgathered_unique_cluster_ids.begin(), - recvcounts, - displacements, - handle.get_stream()); - - thrust::sort( - handle.get_thrust_policy(), - allgathered_unique_cluster_ids.begin(), - allgathered_unique_cluster_ids.end()); + unique_cluster_ids.begin(), + allgathered_unique_cluster_ids.begin(), + recvcounts, + displacements, + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + allgathered_unique_cluster_ids.begin(), + allgathered_unique_cluster_ids.end()); allgathered_unique_cluster_ids.resize( - thrust::distance( - allgathered_unique_cluster_ids.begin(), - thrust::unique(handle.get_thrust_policy(), - allgathered_unique_cluster_ids.begin(), - allgathered_unique_cluster_ids.end())), - handle.get_stream()); - - detail::relabel_cluster_ids(handle, allgathered_unique_cluster_ids, clustering, local_num_verts); + thrust::distance(allgathered_unique_cluster_ids.begin(), + thrust::unique(handle.get_thrust_policy(), + allgathered_unique_cluster_ids.begin(), + allgathered_unique_cluster_ids.end())), + handle.get_stream()); + + detail::relabel_cluster_ids( + handle, allgathered_unique_cluster_ids, clustering, local_num_verts); } else { - detail::relabel_cluster_ids(handle, unique_cluster_ids, clustering, local_num_verts); + detail::relabel_cluster_ids( + handle, unique_cluster_ids, clustering, local_num_verts); } - return std::make_pair(dendrogram->num_levels(), modularity); } diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index beff0b59b5..1c327853c7 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From fdf09debc051eca16434447dfa351b1289c231dc Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 7 Jan 2025 16:57:10 -0800 Subject: [PATCH 05/20] shuffle cluster IDs --- cpp/src/community/leiden_impl.cuh | 70 ++++++------------------------- 1 file changed, 12 insertions(+), 58 deletions(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index cdc119ad32..558367b703 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -604,26 +604,19 @@ void relabel_cluster_ids(raft::handle_t const& handle, size_t num_nodes) { vertex_t local_cluster_id_first{0}; + + // Get unique cluster id and shuffle + remove_duplicates(handle, unique_cluster_ids); + if constexpr (multi_gpu) { - auto unique_cluster_range_lasts = cugraph::partition_manager::compute_partition_range_lasts( - handle, static_cast(unique_cluster_ids.size())); + auto cluster_ids_size_per_rank = cugraph::host_scalar_allgather( + handle.get_comms(), unique_cluster_ids.size(), handle.get_stream()); + std::vector cluster_ids_starts(cluster_ids_size_per_rank.size()); + std::exclusive_scan(cluster_ids_size_per_rank.begin(), cluster_ids_size_per_rank.end(), cluster_ids_starts.begin(), size_t{0}); auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto const major_comm_rank = major_comm.get_rank(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - auto const minor_comm_rank = minor_comm.get_rank(); - - auto vertex_partition_id = - partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks( - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); - - local_cluster_id_first = - vertex_partition_id == 0 ? vertex_t{0} : unique_cluster_range_lasts[vertex_partition_id - 1]; + local_cluster_id_first = cluster_ids_starts[comm.get_rank()]; + } rmm::device_uvector numbering_indices(unique_cluster_ids.size(), handle.get_stream()); @@ -713,7 +706,6 @@ std::pair leiden( detail::flatten_leiden_dendrogram(handle, graph_view, *dendrogram, clustering); - // Get unique cluster id size_t local_num_verts = (*dendrogram).get_level_size_nocheck(0); rmm::device_uvector unique_cluster_ids(local_num_verts, handle.get_stream()); @@ -722,49 +714,11 @@ std::pair leiden( clustering + local_num_verts, unique_cluster_ids.begin()); - thrust::sort(handle.get_thrust_policy(), unique_cluster_ids.begin(), unique_cluster_ids.end()); - - unique_cluster_ids.resize(thrust::distance(unique_cluster_ids.begin(), - thrust::unique(handle.get_thrust_policy(), - unique_cluster_ids.begin(), - unique_cluster_ids.end())), - handle.get_stream()); - - if constexpr (multi_gpu) { - auto recvcounts = cugraph::host_scalar_allgather( - handle.get_comms(), unique_cluster_ids.size(), handle.get_stream()); - - std::vector displacements(recvcounts.size()); - std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); - rmm::device_uvector allgathered_unique_cluster_ids( - displacements.back() + recvcounts.back(), handle.get_stream()); - cugraph::device_allgatherv(handle.get_comms(), - unique_cluster_ids.begin(), - allgathered_unique_cluster_ids.begin(), - recvcounts, - displacements, - handle.get_stream()); - - thrust::sort(handle.get_thrust_policy(), - allgathered_unique_cluster_ids.begin(), - allgathered_unique_cluster_ids.end()); - allgathered_unique_cluster_ids.resize( - thrust::distance(allgathered_unique_cluster_ids.begin(), - thrust::unique(handle.get_thrust_policy(), - allgathered_unique_cluster_ids.begin(), - allgathered_unique_cluster_ids.end())), - handle.get_stream()); - - detail::relabel_cluster_ids( - handle, allgathered_unique_cluster_ids, clustering, local_num_verts); - - } else { - detail::relabel_cluster_ids( + detail::relabel_cluster_ids( handle, unique_cluster_ids, clustering, local_num_verts); - } return std::make_pair(dendrogram->num_levels(), modularity); } -} // namespace cugraph +} // namespace cugraph \ No newline at end of file From ce22ea5ff1dc240ea100609d4c701c544bcc6fe5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 7 Jan 2025 17:00:54 -0800 Subject: [PATCH 06/20] fix style --- cpp/src/community/leiden_impl.cuh | 15 ++++++++------- python/cugraph/cugraph/dask/community/leiden.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 558367b703..5a492d34a3 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -613,10 +613,12 @@ void relabel_cluster_ids(raft::handle_t const& handle, handle.get_comms(), unique_cluster_ids.size(), handle.get_stream()); std::vector cluster_ids_starts(cluster_ids_size_per_rank.size()); - std::exclusive_scan(cluster_ids_size_per_rank.begin(), cluster_ids_size_per_rank.end(), cluster_ids_starts.begin(), size_t{0}); - auto& comm = handle.get_comms(); - local_cluster_id_first = cluster_ids_starts[comm.get_rank()]; - + std::exclusive_scan(cluster_ids_size_per_rank.begin(), + cluster_ids_size_per_rank.end(), + cluster_ids_starts.begin(), + size_t{0}); + auto& comm = handle.get_comms(); + local_cluster_id_first = cluster_ids_starts[comm.get_rank()]; } rmm::device_uvector numbering_indices(unique_cluster_ids.size(), handle.get_stream()); @@ -714,11 +716,10 @@ std::pair leiden( clustering + local_num_verts, unique_cluster_ids.begin()); - detail::relabel_cluster_ids( - handle, unique_cluster_ids, clustering, local_num_verts); + handle, unique_cluster_ids, clustering, local_num_verts); return std::make_pair(dendrogram->num_levels(), modularity); } -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index 1c327853c7..beff0b59b5 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2025, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 649ea7f4903d73ed08e33b09694aeaafb0708772 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 8 Jan 2025 11:48:42 -0800 Subject: [PATCH 07/20] simplify code --- cpp/src/community/leiden_impl.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 5a492d34a3..0d2afc631c 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -617,8 +617,7 @@ void relabel_cluster_ids(raft::handle_t const& handle, cluster_ids_size_per_rank.end(), cluster_ids_starts.begin(), size_t{0}); - auto& comm = handle.get_comms(); - local_cluster_id_first = cluster_ids_starts[comm.get_rank()]; + local_cluster_id_first = cluster_ids_starts[handle.get_comms().get_rank()]; } rmm::device_uvector numbering_indices(unique_cluster_ids.size(), handle.get_stream()); From cfdba935fb9e63829f8b54736803fb6c72a2bf68 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 8 Jan 2025 13:15:57 -0800 Subject: [PATCH 08/20] add SG tests for Leiden's cluster IDs --- cpp/tests/community/leiden_test.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 5ce0903f72..bc58354bde 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -128,6 +128,21 @@ class Tests_Leiden : public ::testing::TestWithParam(handle, clustering_v); + + unique_clustering_v = cugraph::test::unique(handle, std::move(unique_clustering_v)); + + auto expected_unique_clustering_v = cugraph::test::sequence( + handle, unique_clustering_v.size(), size_t{1}, int32_t{0}); + + auto h_unique_clustering_v = cugraph::test::to_host(handle, unique_clustering_v); + auto h_expected_unique_clustering_v = cugraph::test::to_host(handle, expected_unique_clustering_v); + + ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(), + h_unique_clustering_v.end(), + h_expected_unique_clustering_v.begin())) + << "Returned cluster IDs are not numbered consecutively"; } }; From 9e244884f249ac1736dbf100bee350e0f0813897 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 8 Jan 2025 16:11:09 -0800 Subject: [PATCH 09/20] add MG tests for Leiden's cluster IDs --- cpp/tests/community/mg_leiden_test.cpp | 45 ++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index 6949ac8d17..e22a2cb00e 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -175,6 +175,7 @@ class Tests_MGLeiden if (leiden_usecase.check_correctness_) { SCOPED_TRACE("compare modularity input"); + // FIXME: The dendrogram is unused compare_sg_results(*handle_, rng_state, mg_graph_view, @@ -184,6 +185,50 @@ class Tests_MGLeiden leiden_usecase.theta_, mg_modularity); } + + // Check numbering + vertex_t num_vertices = mg_graph_view.local_vertex_partition_range_size(); + rmm::device_uvector clustering_v(num_vertices, handle_->get_stream()); + cugraph::leiden( + *handle_, + rng_state, + mg_graph_view, + mg_edge_weight_view, + clustering_v.data(), + leiden_usecase.max_level_, + leiden_usecase.resolution_); + + // Ensure each rank has consecutive labels + + auto unique_clustering_v = cugraph::test::sort(*handle_, clustering_v); + + unique_clustering_v = cugraph::test::unique(*handle_, std::move(unique_clustering_v)); + + auto h_unique_clustering_v = cugraph::test::to_host(*handle_, unique_clustering_v); + + auto expected_unique_clustering_v = cugraph::test::sequence( + *handle_, unique_clustering_v.size(), size_t{1}, h_unique_clustering_v[0]); + + auto h_expected_unique_clustering_v = cugraph::test::to_host(*handle_, expected_unique_clustering_v); + + ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(), + h_unique_clustering_v.end(), + h_expected_unique_clustering_v.begin())) + << "Returned cluster IDs are not numbered consecutively in each rank"; + + // Check if cluster IDs are globally numbered consecutively + auto cluster_ids_size_per_rank = cugraph::host_scalar_allgather( + handle_->get_comms(), h_unique_clustering_v.size(), handle_->get_stream()); + + assert( + h_unique_clustering_v.back() == (cluster_ids_size_per_rank[handle_->get_comms().get_rank - 1])); + + // Necessary condition for the culster IDs to be globally numbered consecutively and coupled with + // the first check, it is sufficient. + EXPECT_EQ( + h_unique_clustering_v.back(), + cluster_ids_size_per_rank[handle_->get_comms().get_rank()] - 1) + << "Returned cluster IDs are not globally numbered consecutively"; } private: From 2f3a7370549fee3e71914f1fa684ee742cbde19b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 8 Jan 2025 16:12:56 -0800 Subject: [PATCH 10/20] fix style --- cpp/tests/community/leiden_test.cpp | 13 ++++----- cpp/tests/community/mg_leiden_test.cpp | 37 +++++++++++++------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index bc58354bde..fc92709ac0 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -128,16 +128,17 @@ class Tests_Leiden : public ::testing::TestWithParam(handle, clustering_v); unique_clustering_v = cugraph::test::unique(handle, std::move(unique_clustering_v)); - auto expected_unique_clustering_v = cugraph::test::sequence( - handle, unique_clustering_v.size(), size_t{1}, int32_t{0}); - + auto expected_unique_clustering_v = + cugraph::test::sequence(handle, unique_clustering_v.size(), size_t{1}, int32_t{0}); + auto h_unique_clustering_v = cugraph::test::to_host(handle, unique_clustering_v); - auto h_expected_unique_clustering_v = cugraph::test::to_host(handle, expected_unique_clustering_v); + auto h_expected_unique_clustering_v = + cugraph::test::to_host(handle, expected_unique_clustering_v); ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(), h_unique_clustering_v.end(), diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index e22a2cb00e..bef32e711e 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -189,15 +189,14 @@ class Tests_MGLeiden // Check numbering vertex_t num_vertices = mg_graph_view.local_vertex_partition_range_size(); rmm::device_uvector clustering_v(num_vertices, handle_->get_stream()); - cugraph::leiden( - *handle_, - rng_state, - mg_graph_view, - mg_edge_weight_view, - clustering_v.data(), - leiden_usecase.max_level_, - leiden_usecase.resolution_); - + cugraph::leiden(*handle_, + rng_state, + mg_graph_view, + mg_edge_weight_view, + clustering_v.data(), + leiden_usecase.max_level_, + leiden_usecase.resolution_); + // Ensure each rank has consecutive labels auto unique_clustering_v = cugraph::test::sort(*handle_, clustering_v); @@ -208,8 +207,9 @@ class Tests_MGLeiden auto expected_unique_clustering_v = cugraph::test::sequence( *handle_, unique_clustering_v.size(), size_t{1}, h_unique_clustering_v[0]); - - auto h_expected_unique_clustering_v = cugraph::test::to_host(*handle_, expected_unique_clustering_v); + + auto h_expected_unique_clustering_v = + cugraph::test::to_host(*handle_, expected_unique_clustering_v); ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(), h_unique_clustering_v.end(), @@ -220,14 +220,13 @@ class Tests_MGLeiden auto cluster_ids_size_per_rank = cugraph::host_scalar_allgather( handle_->get_comms(), h_unique_clustering_v.size(), handle_->get_stream()); - assert( - h_unique_clustering_v.back() == (cluster_ids_size_per_rank[handle_->get_comms().get_rank - 1])); + assert(h_unique_clustering_v.back() == + (cluster_ids_size_per_rank[handle_->get_comms().get_rank - 1])); - // Necessary condition for the culster IDs to be globally numbered consecutively and coupled with - // the first check, it is sufficient. - EXPECT_EQ( - h_unique_clustering_v.back(), - cluster_ids_size_per_rank[handle_->get_comms().get_rank()] - 1) + // Necessary condition for the culster IDs to be globally numbered consecutively and coupled + // with the first check, it is sufficient. + EXPECT_EQ(h_unique_clustering_v.back(), + cluster_ids_size_per_rank[handle_->get_comms().get_rank()] - 1) << "Returned cluster IDs are not globally numbered consecutively"; } From 9489ea01c9291d1ee8c813ac249ea9334d21b5d7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 15:10:28 -0800 Subject: [PATCH 11/20] include utility functions --- cpp/tests/community/leiden_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index fc92709ac0..db2828dfdd 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -10,6 +10,7 @@ */ #include "utilities/base_fixture.hpp" #include "utilities/test_graphs.hpp" +#include "utilities/conversion_utilities.hpp" #include #include From 1ffe352934b1365dbde7231015d82ab08ba2dcba Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 15:11:25 -0800 Subject: [PATCH 12/20] update mg tests --- cpp/tests/community/mg_leiden_test.cpp | 28 +++++++++++--------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index bef32e711e..81df26b68e 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -197,12 +197,17 @@ class Tests_MGLeiden leiden_usecase.max_level_, leiden_usecase.resolution_); - // Ensure each rank has consecutive labels - auto unique_clustering_v = cugraph::test::sort(*handle_, clustering_v); unique_clustering_v = cugraph::test::unique(*handle_, std::move(unique_clustering_v)); + unique_clustering_v = + cugraph::test::device_allgatherv(*handle_, unique_clustering_v.data(), unique_clustering_v.size()); + + unique_clustering_v = cugraph::test::sort(*handle_, unique_clustering_v); + + unique_clustering_v = cugraph::test::unique(*handle_, std::move(unique_clustering_v)); + auto h_unique_clustering_v = cugraph::test::to_host(*handle_, unique_clustering_v); auto expected_unique_clustering_v = cugraph::test::sequence( @@ -210,24 +215,13 @@ class Tests_MGLeiden auto h_expected_unique_clustering_v = cugraph::test::to_host(*handle_, expected_unique_clustering_v); + + ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(), h_unique_clustering_v.end(), h_expected_unique_clustering_v.begin())) - << "Returned cluster IDs are not numbered consecutively in each rank"; - - // Check if cluster IDs are globally numbered consecutively - auto cluster_ids_size_per_rank = cugraph::host_scalar_allgather( - handle_->get_comms(), h_unique_clustering_v.size(), handle_->get_stream()); - - assert(h_unique_clustering_v.back() == - (cluster_ids_size_per_rank[handle_->get_comms().get_rank - 1])); - - // Necessary condition for the culster IDs to be globally numbered consecutively and coupled - // with the first check, it is sufficient. - EXPECT_EQ(h_unique_clustering_v.back(), - cluster_ids_size_per_rank[handle_->get_comms().get_rank()] - 1) - << "Returned cluster IDs are not globally numbered consecutively"; + << "Returned cluster IDs are not numbered consecutively"; } private: @@ -272,6 +266,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(Leiden_Usecase{100, 1, 1, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); +//#if 0 INSTANTIATE_TEST_SUITE_P(rmat_small_tests, Tests_MGLeiden_Rmat, ::testing::Combine(::testing::Values(Leiden_Usecase{100, 1, false}), @@ -301,5 +296,6 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs ::testing::Values(Leiden_Usecase{100, 1, 1, false}), ::testing::Values(cugraph::test::Rmat_Usecase(12, 32, 0.57, 0.19, 0.19, 0, true, false)))); +//#endif CUGRAPH_MG_TEST_PROGRAM_MAIN() From c70a229c4b532e05db3e8fada06eab5015bdb34a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 15:14:01 -0800 Subject: [PATCH 13/20] fix style --- cpp/tests/community/leiden_test.cpp | 2 +- cpp/tests/community/mg_leiden_test.cpp | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index db2828dfdd..ad2be59eee 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -9,8 +9,8 @@ * */ #include "utilities/base_fixture.hpp" -#include "utilities/test_graphs.hpp" #include "utilities/conversion_utilities.hpp" +#include "utilities/test_graphs.hpp" #include #include diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index 81df26b68e..a7d1321237 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -201,9 +201,9 @@ class Tests_MGLeiden unique_clustering_v = cugraph::test::unique(*handle_, std::move(unique_clustering_v)); - unique_clustering_v = - cugraph::test::device_allgatherv(*handle_, unique_clustering_v.data(), unique_clustering_v.size()); - + unique_clustering_v = cugraph::test::device_allgatherv( + *handle_, unique_clustering_v.data(), unique_clustering_v.size()); + unique_clustering_v = cugraph::test::sort(*handle_, unique_clustering_v); unique_clustering_v = cugraph::test::unique(*handle_, std::move(unique_clustering_v)); @@ -215,8 +215,6 @@ class Tests_MGLeiden auto h_expected_unique_clustering_v = cugraph::test::to_host(*handle_, expected_unique_clustering_v); - - ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(), h_unique_clustering_v.end(), @@ -266,7 +264,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(Leiden_Usecase{100, 1, 1, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); -//#if 0 +// #if 0 INSTANTIATE_TEST_SUITE_P(rmat_small_tests, Tests_MGLeiden_Rmat, ::testing::Combine(::testing::Values(Leiden_Usecase{100, 1, false}), @@ -296,6 +294,6 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs ::testing::Values(Leiden_Usecase{100, 1, 1, false}), ::testing::Values(cugraph::test::Rmat_Usecase(12, 32, 0.57, 0.19, 0.19, 0, true, false)))); -//#endif +// #endif CUGRAPH_MG_TEST_PROGRAM_MAIN() From 66bd69da3f947a4cd4a7ccdd991d4e50c50673ee Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 15:20:00 -0800 Subject: [PATCH 14/20] removed unused code --- cpp/tests/community/mg_leiden_test.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index a7d1321237..081c5cb6df 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -264,7 +264,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(Leiden_Usecase{100, 1, 1, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); -// #if 0 INSTANTIATE_TEST_SUITE_P(rmat_small_tests, Tests_MGLeiden_Rmat, ::testing::Combine(::testing::Values(Leiden_Usecase{100, 1, false}), @@ -294,6 +293,5 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs ::testing::Values(Leiden_Usecase{100, 1, 1, false}), ::testing::Values(cugraph::test::Rmat_Usecase(12, 32, 0.57, 0.19, 0.19, 0, true, false)))); -// #endif CUGRAPH_MG_TEST_PROGRAM_MAIN() From 485ab35752d24b4c9233584168c706e5e9562c47 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 16:06:38 -0800 Subject: [PATCH 15/20] fix copyright --- python/cugraph/cugraph/community/leiden.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/community/leiden.py b/python/cugraph/cugraph/community/leiden.py index 6abedcac95..4895d0e839 100644 --- a/python/cugraph/cugraph/community/leiden.py +++ b/python/cugraph/cugraph/community/leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From d1d3687473f7e79f06d3f288368cadeb8778a6d4 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 16:12:07 -0800 Subject: [PATCH 16/20] fix copyright --- python/cugraph/cugraph/dask/community/leiden.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index beff0b59b5..1c327853c7 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 2f00a89bb9f3ca1a5ae48ae746e2c928bd358bd6 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 9 Jan 2025 16:21:28 -0800 Subject: [PATCH 17/20] update copyright --- python/cugraph/cugraph/community/leiden.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/community/leiden.py b/python/cugraph/cugraph/community/leiden.py index 4895d0e839..6abedcac95 100644 --- a/python/cugraph/cugraph/community/leiden.py +++ b/python/cugraph/cugraph/community/leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From c9e2781cf274a67e636143325ed07260203c698f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 10 Jan 2025 11:04:38 -0800 Subject: [PATCH 18/20] handle case when random_state has default value --- python/cugraph/cugraph/dask/community/leiden.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index 1c327853c7..adcb278928 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -156,7 +156,7 @@ def leiden( input_graph._plc_graph[w], max_iter, resolution, - random_state + i, + (random_state + i) if random_state is not None else random_state, theta, do_expensive_check, workers=[w], From c9945cd752d121872360d61fcb5e808f392aa0a5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 10 Jan 2025 11:05:47 -0800 Subject: [PATCH 19/20] add python tests for Leiden numbering --- .../cugraph/tests/community/test_leiden.py | 21 ++++++++++++++++++- .../cugraph/tests/community/test_leiden_mg.py | 13 +++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py index 48300b2201..4750cb4c69 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden.py +++ b/python/cugraph/cugraph/tests/community/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import cugraph import cudf +from cudf.testing.testing import assert_series_equal from cugraph.testing import utils, UNDIRECTED_DATASETS from cugraph.datasets import karate_asymmetric @@ -185,6 +186,15 @@ def test_leiden(graph_file): leiden_parts, leiden_mod = cugraph_leiden(G) louvain_parts, louvain_mod = cugraph_louvain(G) + unique_parts = leiden_parts["partition"].drop_duplicates().sort_values( + ascending=True).reset_index(drop=True) + + idx_col = cudf.Series(unique_parts.index) + + # Ensure Leiden cluster's ID are numbered consecutively + assert_series_equal( + unique_parts, idx_col, check_dtype=False, check_names=False) + # Leiden modularity score is smaller than Louvain's assert leiden_mod >= (0.75 * louvain_mod) @@ -202,6 +212,15 @@ def test_leiden_nx(graph_file): leiden_parts, leiden_mod = cugraph_leiden(G) louvain_parts, louvain_mod = cugraph_louvain(G) + unique_parts = cudf.Series(leiden_parts.values()).drop_duplicates().sort_values( + ascending=True).reset_index(drop=True) + + idx_col = cudf.Series(unique_parts.index) + + # Ensure Leiden cluster's ID are numbered consecutively + assert_series_equal( + unique_parts, idx_col, check_dtype=False, check_names=False) + # Calculating modularity scores for comparison # Leiden modularity score is smaller than Louvain's assert leiden_mod >= (0.75 * louvain_mod) diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index 2904ecd12a..fdcec19ad2 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,6 +16,8 @@ import cugraph import cugraph.dask as dcg from cugraph.datasets import karate_asymmetric, karate, dolphins +import cudf +from cudf.testing.testing import assert_series_equal # ============================================================================= @@ -64,6 +66,15 @@ def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset): dg = get_mg_graph(dataset, directed=False) parts, mod = dcg.leiden(dg) + unique_parts = parts["partition"].compute().drop_duplicates().sort_values( + ascending=True).reset_index(drop=True) + + idx_col = cudf.Series(unique_parts.index) + + # Ensure Leiden cluster's ID are numbered consecutively + assert_series_equal( + unique_parts, idx_col, check_dtype=False, check_names=False) + # FIXME: either call Nx with the same dataset and compare results, or # hardcode golden results to compare to. print() From f4d811a1c257262c9f0574f7f42cab9310320e58 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 10 Jan 2025 11:07:12 -0800 Subject: [PATCH 20/20] fix style --- .../cugraph/tests/community/test_leiden.py | 26 ++++++++++++------- .../cugraph/tests/community/test_leiden_mg.py | 14 ++++++---- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py index 4750cb4c69..04ed855adb 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden.py +++ b/python/cugraph/cugraph/tests/community/test_leiden.py @@ -186,14 +186,17 @@ def test_leiden(graph_file): leiden_parts, leiden_mod = cugraph_leiden(G) louvain_parts, louvain_mod = cugraph_louvain(G) - unique_parts = leiden_parts["partition"].drop_duplicates().sort_values( - ascending=True).reset_index(drop=True) - + unique_parts = ( + leiden_parts["partition"] + .drop_duplicates() + .sort_values(ascending=True) + .reset_index(drop=True) + ) + idx_col = cudf.Series(unique_parts.index) # Ensure Leiden cluster's ID are numbered consecutively - assert_series_equal( - unique_parts, idx_col, check_dtype=False, check_names=False) + assert_series_equal(unique_parts, idx_col, check_dtype=False, check_names=False) # Leiden modularity score is smaller than Louvain's assert leiden_mod >= (0.75 * louvain_mod) @@ -212,14 +215,17 @@ def test_leiden_nx(graph_file): leiden_parts, leiden_mod = cugraph_leiden(G) louvain_parts, louvain_mod = cugraph_louvain(G) - unique_parts = cudf.Series(leiden_parts.values()).drop_duplicates().sort_values( - ascending=True).reset_index(drop=True) - + unique_parts = ( + cudf.Series(leiden_parts.values()) + .drop_duplicates() + .sort_values(ascending=True) + .reset_index(drop=True) + ) + idx_col = cudf.Series(unique_parts.index) # Ensure Leiden cluster's ID are numbered consecutively - assert_series_equal( - unique_parts, idx_col, check_dtype=False, check_names=False) + assert_series_equal(unique_parts, idx_col, check_dtype=False, check_names=False) # Calculating modularity scores for comparison # Leiden modularity score is smaller than Louvain's diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index fdcec19ad2..4f6fee029d 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -66,14 +66,18 @@ def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset): dg = get_mg_graph(dataset, directed=False) parts, mod = dcg.leiden(dg) - unique_parts = parts["partition"].compute().drop_duplicates().sort_values( - ascending=True).reset_index(drop=True) - + unique_parts = ( + parts["partition"] + .compute() + .drop_duplicates() + .sort_values(ascending=True) + .reset_index(drop=True) + ) + idx_col = cudf.Series(unique_parts.index) # Ensure Leiden cluster's ID are numbered consecutively - assert_series_equal( - unique_parts, idx_col, check_dtype=False, check_names=False) + assert_series_equal(unique_parts, idx_col, check_dtype=False, check_names=False) # FIXME: either call Nx with the same dataset and compare results, or # hardcode golden results to compare to.