diff --git a/src/ucp/proto/proto_perf.c b/src/ucp/proto/proto_perf.c index f946f1e341c..f6fd9729684 100644 --- a/src/ucp/proto/proto_perf.c +++ b/src/ucp/proto/proto_perf.c @@ -156,26 +156,42 @@ static ucs_status_t ucp_proto_perf_segment_split(const ucp_proto_perf_t *perf, return UCS_OK; } +static void ucp_proto_perf_node_update_factor(ucp_proto_perf_node_t *perf_node, + const char *perf_factor_name, + ucs_linear_func_t perf_factor) +{ + if (ucs_linear_func_is_zero(perf_factor, UCP_PROTO_PERF_EPSILON)) { + return; + } + + ucp_proto_perf_node_update_data(perf_node, perf_factor_name, perf_factor); +} + static void ucp_proto_perf_node_update_factors(ucp_proto_perf_node_t *perf_node, const ucp_proto_perf_factors_t perf_factors) { ucp_proto_perf_factor_id_t factor_id; - ucs_linear_func_t perf_factor; /* Add the functions to the segment and the performance node */ for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; ++factor_id) { - perf_factor = perf_factors[factor_id]; - if (ucs_linear_func_is_zero(perf_factor, UCP_PROTO_PERF_EPSILON)) { - continue; - } - - ucp_proto_perf_node_update_data(perf_node, - ucp_proto_perf_factor_names[factor_id], - perf_factors[factor_id]); + ucp_proto_perf_node_update_factor(perf_node, + ucp_proto_perf_factor_names[factor_id], + perf_factors[factor_id]); } } +static void +ucp_proto_perf_segment_update_factor(ucp_proto_perf_segment_t *seg, + ucp_proto_perf_factor_id_t factor_id, + ucs_linear_func_t perf_factor) +{ + seg->perf_factors[factor_id] = perf_factor; + ucp_proto_perf_node_update_factor(seg->node, + ucp_proto_perf_factor_names[factor_id], + perf_factor); +} + static void ucp_proto_perf_segment_add_funcs(ucp_proto_perf_t *perf, ucp_proto_perf_segment_t *seg, @@ -190,11 +206,12 @@ ucp_proto_perf_segment_add_funcs(ucp_proto_perf_t *perf, /* Add the functions to the segment and the performance node */ for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; ++factor_id) { - ucs_linear_func_add_inplace(&seg->perf_factors[factor_id], - perf_factors[factor_id]); + ucp_proto_perf_segment_update_factor( + seg, factor_id, + ucs_linear_func_add(seg->perf_factors[factor_id], + perf_factors[factor_id])); } - ucp_proto_perf_node_update_factors(seg->node, seg->perf_factors); ucp_proto_perf_node_add_child(seg->node, perf_node); } @@ -430,6 +447,31 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name, return ucp_proto_perf_aggregate(name, perf_elems, 2, perf_p); } +void ucp_proto_perf_apply_func(ucp_proto_perf_t *perf, ucs_linear_func_t func, + const char *name, const char *desc_fmt, ...) +{ + ucp_proto_perf_segment_t *seg; + ucp_proto_perf_factor_id_t factor_id; + va_list ap; + ucp_proto_perf_node_t *func_node; + + ucp_proto_perf_segment_foreach(seg, perf) { + for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; + ++factor_id) { + ucp_proto_perf_segment_update_factor( + seg, factor_id, + ucs_linear_func_compose(func, + seg->perf_factors[factor_id])); + } + + va_start(ap, desc_fmt); + func_node = ucp_proto_perf_node_new_data(name, desc_fmt, ap); + va_end(ap); + + ucp_proto_perf_node_own_child(seg->node, &func_node); + } +} + /* TODO: * Reconsider correctness of PPLN perf estimation logic since in case of async * operations it seems wrong to choose the longest factor without paying diff --git a/src/ucp/proto/proto_perf.h b/src/ucp/proto/proto_perf.h index 607382f87ed..24bed9f49d5 100644 --- a/src/ucp/proto/proto_perf.h +++ b/src/ucp/proto/proto_perf.h @@ -159,6 +159,21 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name, ucp_proto_perf_t **perf_p); +/** + * Apply function to the performance factors of the given performance structure. + * + * @param [in] perf Performance data structure to update. + * @param [in] func Function to apply to the performance factors of the + * @a perf performance structure. + * @param [in] name Name for the performance node that would be created + * to represent the impact of @a func. + * @param [in] desc_fmt Formatted description for the performance node that + * would be created to represent the impact of @a func. + */ +void ucp_proto_perf_apply_func(ucp_proto_perf_t *perf, ucs_linear_func_t func, + const char *name, const char *desc_fmt, ...); + + /** * Expand given perf by estimation that all messages on interval * [end of @a frag_seg + 1, @a max_length] would be sent in a pipeline async diff --git a/src/ucp/rndv/proto_rndv.c b/src/ucp/rndv/proto_rndv.c index 3fe6f5ab20f..fc4b3f42f20 100644 --- a/src/ucp/rndv/proto_rndv.c +++ b/src/ucp/rndv/proto_rndv.c @@ -403,6 +403,13 @@ static void ucp_proto_rndv_ctrl_variant_probe( cfg_thresh = remote_proto->cfg_thresh; } + if (fabs(params->perf_bias) > UCP_PROTO_PERF_EPSILON) { + ucp_proto_perf_apply_func(perf, + ucs_linear_func_make(0.0, + 1.0 - params->perf_bias), + "bias", "%.2f %%", params->perf_bias); + } + ucp_proto_select_add_proto(¶ms->super.super, cfg_thresh, cfg_priority, perf, rpriv, priv_size); diff --git a/test/gtest/ucp/test_ucp_proto_mock.cc b/test/gtest/ucp/test_ucp_proto_mock.cc index b3a13651dba..f21239e7d89 100644 --- a/test/gtest/ucp/test_ucp_proto_mock.cc +++ b/test/gtest/ucp/test_ucp_proto_mock.cc @@ -366,8 +366,8 @@ class test_ucp_proto_mock_rcx : public test_ucp_proto_mock { } }; -UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane, - "IB_NUM_PATHS?=1", "MAX_RNDV_LANES=1") +UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane, "IB_NUM_PATHS?=1", + "MAX_RNDV_LANES=1") { ucp_proto_select_key_t key = any_key(); key.param.op_id_flags = UCP_OP_ID_AM_SEND; @@ -384,8 +384,25 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane, }, key); } -UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, - "IB_NUM_PATHS?=2", "MAX_RNDV_LANES=2") +UCS_TEST_P(test_ucp_proto_mock_rcx, zero_rndv_perf_diff, "IB_NUM_PATHS?=1", + "MAX_RNDV_LANES=1", "RNDV_PERF_DIFF=0") +{ + ucp_proto_select_key_t key = any_key(); + key.param.op_id_flags = UCP_OP_ID_AM_SEND; + key.param.op_attr = 0; + + check_ep_config(sender(), { + {0, 200, "short", "rc_mlx5/mock_1:1"}, + {201, 6650, "copy-in", "rc_mlx5/mock_1:1"}, + {6651, 8246, "zero-copy", "rc_mlx5/mock_1:1"}, + {8247, 22502, "multi-frag zero-copy", "rc_mlx5/mock_1:1"}, + {22503, INF, "rendezvous zero-copy read from remote", + "rc_mlx5/mock_0:1"}, + }, key); +} + +UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, "IB_NUM_PATHS?=2", + "MAX_RNDV_LANES=2") { ucp_proto_select_key_t key = any_key(); key.param.op_id_flags = UCP_OP_ID_AM_SEND; @@ -396,8 +413,8 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, {0, 200, "short", "rc_mlx5/mock_1:1/path0"}, {201, 6650, "copy-in", "rc_mlx5/mock_1:1/path0"}, {6651, 8246, "zero-copy", "rc_mlx5/mock_1:1/path0"}, - {8247, 20300, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"}, - {20301, INF, "rendezvous zero-copy read from remote", + {8247, 19883, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"}, + {19884, INF, "rendezvous zero-copy read from remote", "47% on rc_mlx5/mock_1:1/path0 and 53% on rc_mlx5/mock_0:1/path0"}, }, key); }