Skip to content

Commit

Permalink
UCP/PROTO: Consider RNDV_PERF_DIFF
Browse files Browse the repository at this point in the history
  • Loading branch information
rakhmets committed Jan 22, 2025
1 parent 3a9f02a commit 5653bb2
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 18 deletions.
66 changes: 54 additions & 12 deletions src/ucp/proto/proto_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,26 +156,42 @@ static ucs_status_t ucp_proto_perf_segment_split(const ucp_proto_perf_t *perf,
return UCS_OK;
}

static void ucp_proto_perf_node_update_factor(ucp_proto_perf_node_t *perf_node,
const char *perf_factor_name,
ucs_linear_func_t perf_factor)
{
if (ucs_linear_func_is_zero(perf_factor, UCP_PROTO_PERF_EPSILON)) {
return;
}

ucp_proto_perf_node_update_data(perf_node, perf_factor_name, perf_factor);
}

static void
ucp_proto_perf_node_update_factors(ucp_proto_perf_node_t *perf_node,
const ucp_proto_perf_factors_t perf_factors)
{
ucp_proto_perf_factor_id_t factor_id;
ucs_linear_func_t perf_factor;

/* Add the functions to the segment and the performance node */
for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; ++factor_id) {
perf_factor = perf_factors[factor_id];
if (ucs_linear_func_is_zero(perf_factor, UCP_PROTO_PERF_EPSILON)) {
continue;
}

ucp_proto_perf_node_update_data(perf_node,
ucp_proto_perf_factor_names[factor_id],
perf_factors[factor_id]);
ucp_proto_perf_node_update_factor(perf_node,
ucp_proto_perf_factor_names[factor_id],
perf_factors[factor_id]);
}
}

static void
ucp_proto_perf_segment_update_factor(ucp_proto_perf_segment_t *seg,
ucp_proto_perf_factor_id_t factor_id,
ucs_linear_func_t perf_factor)
{
seg->perf_factors[factor_id] = perf_factor;
ucp_proto_perf_node_update_factor(seg->node,
ucp_proto_perf_factor_names[factor_id],
perf_factor);
}

static void
ucp_proto_perf_segment_add_funcs(ucp_proto_perf_t *perf,
ucp_proto_perf_segment_t *seg,
Expand All @@ -190,11 +206,12 @@ ucp_proto_perf_segment_add_funcs(ucp_proto_perf_t *perf,

/* Add the functions to the segment and the performance node */
for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; ++factor_id) {
ucs_linear_func_add_inplace(&seg->perf_factors[factor_id],
perf_factors[factor_id]);
ucp_proto_perf_segment_update_factor(
seg, factor_id,
ucs_linear_func_add(seg->perf_factors[factor_id],
perf_factors[factor_id]));
}

ucp_proto_perf_node_update_factors(seg->node, seg->perf_factors);
ucp_proto_perf_node_add_child(seg->node, perf_node);
}

Expand Down Expand Up @@ -430,6 +447,31 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name,
return ucp_proto_perf_aggregate(name, perf_elems, 2, perf_p);
}

void ucp_proto_perf_apply_func(ucp_proto_perf_t *perf, ucs_linear_func_t func,
const char *name, const char *desc_fmt, ...)
{
ucp_proto_perf_segment_t *seg;
ucp_proto_perf_factor_id_t factor_id;
va_list ap;
ucp_proto_perf_node_t *func_node;

ucp_proto_perf_segment_foreach(seg, perf) {
for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST;
++factor_id) {
ucp_proto_perf_segment_update_factor(
seg, factor_id,
ucs_linear_func_compose(func,
seg->perf_factors[factor_id]));
}

va_start(ap, desc_fmt);
func_node = ucp_proto_perf_node_new_data(name, desc_fmt, ap);
va_end(ap);

ucp_proto_perf_node_own_child(seg->node, &func_node);
}
}

/* TODO:
* Reconsider correctness of PPLN perf estimation logic since in case of async
* operations it seems wrong to choose the longest factor without paying
Expand Down
15 changes: 15 additions & 0 deletions src/ucp/proto/proto_perf.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,21 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name,
ucp_proto_perf_t **perf_p);


/**
* Apply function to the performance factors of the given performance structure.
*
* @param [in] perf Performance data structure to update.
* @param [in] func Function to apply to the performance factors of the
* @a perf performance structure.
* @param [in] name Name for the performance node that would be created
* to represent the impact of @a func.
* @param [in] desc_fmt Formatted description for the performance node that
* would be created to represent the impact of @a func.
*/
void ucp_proto_perf_apply_func(ucp_proto_perf_t *perf, ucs_linear_func_t func,
const char *name, const char *desc_fmt, ...);


/**
* Expand given perf by estimation that all messages on interval
* [end of @a frag_seg + 1, @a max_length] would be sent in a pipeline async
Expand Down
7 changes: 7 additions & 0 deletions src/ucp/rndv/proto_rndv.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,13 @@ static void ucp_proto_rndv_ctrl_variant_probe(
cfg_thresh = remote_proto->cfg_thresh;
}

if (fabs(params->perf_bias) > UCP_PROTO_PERF_EPSILON) {
ucp_proto_perf_apply_func(perf,
ucs_linear_func_make(0.0,
1.0 - params->perf_bias),
"bias", "%.2f %%", params->perf_bias);
}

ucp_proto_select_add_proto(&params->super.super, cfg_thresh, cfg_priority,
perf, rpriv, priv_size);

Expand Down
29 changes: 23 additions & 6 deletions test/gtest/ucp/test_ucp_proto_mock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ class test_ucp_proto_mock_rcx : public test_ucp_proto_mock {
}
};

UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane,
"IB_NUM_PATHS?=1", "MAX_RNDV_LANES=1")
UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane, "IB_NUM_PATHS?=1",
"MAX_RNDV_LANES=1")
{
ucp_proto_select_key_t key = any_key();
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
Expand All @@ -384,8 +384,25 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane,
}, key);
}

UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes,
"IB_NUM_PATHS?=2", "MAX_RNDV_LANES=2")
UCS_TEST_P(test_ucp_proto_mock_rcx, zero_rndv_perf_diff, "IB_NUM_PATHS?=1",
"MAX_RNDV_LANES=1", "RNDV_PERF_DIFF=0")
{
ucp_proto_select_key_t key = any_key();
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
key.param.op_attr = 0;

check_ep_config(sender(), {
{0, 200, "short", "rc_mlx5/mock_1:1"},
{201, 6650, "copy-in", "rc_mlx5/mock_1:1"},
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1"},
{8247, 22502, "multi-frag zero-copy", "rc_mlx5/mock_1:1"},
{22503, INF, "rendezvous zero-copy read from remote",
"rc_mlx5/mock_0:1"},
}, key);
}

UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, "IB_NUM_PATHS?=2",
"MAX_RNDV_LANES=2")
{
ucp_proto_select_key_t key = any_key();
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
Expand All @@ -396,8 +413,8 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes,
{0, 200, "short", "rc_mlx5/mock_1:1/path0"},
{201, 6650, "copy-in", "rc_mlx5/mock_1:1/path0"},
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1/path0"},
{8247, 20300, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"},
{20301, INF, "rendezvous zero-copy read from remote",
{8247, 19883, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"},
{19884, INF, "rendezvous zero-copy read from remote",
"47% on rc_mlx5/mock_1:1/path0 and 53% on rc_mlx5/mock_0:1/path0"},
}, key);
}
Expand Down

0 comments on commit 5653bb2

Please sign in to comment.