Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into topic/proto-consi…
Browse files Browse the repository at this point in the history
…der-rndv-perf-diff
  • Loading branch information
rakhmets committed Jan 15, 2025
2 parents 2b89e12 + 3a9f02a commit aaa08bd
Show file tree
Hide file tree
Showing 56 changed files with 1,228 additions and 364 deletions.
4 changes: 2 additions & 2 deletions buildlib/jucx/jucx-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ jobs:
set -eE
{
echo -e "<settings><servers><server>"
echo -e "<id>ossrh</id><username>\${env.SONATYPE_USERNAME}</username>"
echo -e "<password>\${env.SONATYPE_PASSWORD}</password>"
echo -e "<id>ossrh</id><username>$(SONATYPE_USERNAME)</username>"
echo -e "<password>$(SONATYPE_PASSWORD)</password>"
echo -e "</server></servers></settings>"
} > $(temp_cfg)
displayName: Generate temporary config
Expand Down
9 changes: 6 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_STATS], [false])
AM_CONDITIONAL([HAVE_TUNING], [false])
AM_CONDITIONAL([HAVE_IB], [false])
AM_CONDITIONAL([HAVE_EFA], [false])
AM_CONDITIONAL([HAVE_MLX5_HW_UD], [false])
AM_CONDITIONAL([HAVE_MLX5_DV], [false])
AM_CONDITIONAL([HAVE_MLX5_MMO], [false])
Expand Down Expand Up @@ -406,9 +407,11 @@ AC_MSG_NOTICE([Building documents only])
AC_MSG_NOTICE([UCX build configuration:])
AC_MSG_NOTICE([ Build prefix: ${prefix}])
AC_MSG_NOTICE([ Configuration dir: ${ucx_config_dir}])
AC_MSG_NOTICE([ Preprocessor flags: ${BASE_CPPFLAGS}])
AC_MSG_NOTICE([ C compiler: ${CC} ${BASE_CFLAGS}])
AC_MSG_NOTICE([ C++ compiler: ${CXX} ${BASE_CXXFLAGS}])
AC_MSG_NOTICE([ CC: ${CC}])
AC_MSG_NOTICE([ CXX: ${CXX}])
AC_MSG_NOTICE([ CPPFLAGS: ${BASE_CPPFLAGS} ${CPPFLAGS}])
AC_MSG_NOTICE([ CFLAGS: ${BASE_CFLAGS} ${CFLAGS}])
AC_MSG_NOTICE([ CXXFLAGS: ${BASE_CXXFLAGS} ${CXXFLAGS}])
AC_MSG_NOTICE([ ASAN check: ${enable_asan}])
AC_MSG_NOTICE([ Multi-thread: ${mt_enable}])
AC_MSG_NOTICE([ MPI tests: ${mpi_enable}])
Expand Down
1 change: 1 addition & 0 deletions contrib/buildrpm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ if [ $opt_binrpm -eq 1 ]; then
with_args+=" $(with_arg fuse)"
with_args+=" $(with_arg mad)"
with_args+=" $(with_arg mlx5)"
with_args+=" $(with_arg efa)"

echo rpmbuild -bb $rpmmacros $rpmopts $rpmspec $defines $with_args | bash -eEx
fi
4 changes: 2 additions & 2 deletions src/ucp/am/eager_single.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ ucp_am_eager_short_probe_common(const ucp_proto_init_params_t *init_params,
.super.cfg_priority = 0,
.super.min_length = 0,
.super.max_length = SIZE_MAX,
.super.min_iov = 0,
.super.min_iov = 3,
.super.min_frag_offs = UCP_PROTO_COMMON_OFFSET_INVALID,
.super.max_frag_offs = ucs_offsetof(uct_iface_attr_t, cap.am.max_short),
.super.max_iov_offs = UCP_PROTO_COMMON_OFFSET_INVALID,
.super.max_iov_offs = ucs_offsetof(uct_iface_attr_t, cap.am.max_iov),
.super.hdr_size = ucp_am_eager_single_hdr_size(op_id),
.super.send_op = UCT_EP_OP_AM_SHORT,
.super.memtype_op = UCT_EP_OP_LAST,
Expand Down
2 changes: 1 addition & 1 deletion src/ucp/core/ucp_am.c
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ ucp_am_send_short(ucp_ep_h ep, uint16_t id, uint16_t flags, const void *header,
int is_reply)
{
size_t iov_cnt = 0ul;
uct_iov_t iov[4];
uct_iov_t iov[UCP_AM_SEND_SHORT_MIN_IOV];
uint8_t am_id;
ucp_am_hdr_t am_hdr;
ucp_am_reply_ftr_t ftr;
Expand Down
5 changes: 5 additions & 0 deletions src/ucp/core/ucp_am.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
((ucp_am_hdr_t*)&(_rts)->hdr); \
})

/*
* Apart from protov1/v2, UCP can try to send specified number of iovs in one
* uct_ep_am_short_iov() call
*/
#define UCP_AM_SEND_SHORT_MIN_IOV 4

enum {
UCP_AM_CB_PRIV_FIRST_FLAG = UCS_BIT(15),
Expand Down
116 changes: 67 additions & 49 deletions src/ucp/core/ucp_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,10 @@ static ucs_config_field_t ucp_config_table[] = {
ucs_offsetof(ucp_config_t, ctx),
UCS_CONFIG_TYPE_TABLE(ucp_context_config_table)},

{"MAX_COMPONENT_MDS", "16",
"Maximum number of memory domains per component to use.",
ucs_offsetof(ucp_config_t, max_component_mds), UCS_CONFIG_TYPE_ULUNITS},

{NULL}
};
UCS_CONFIG_DECLARE_TABLE(ucp_config_table, "UCP context", NULL, ucp_config_t)
Expand Down Expand Up @@ -1561,6 +1565,7 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
const ucs_string_set_t *aux_tls)
{
const ucp_tl_cmpt_t *tl_cmpt = &context->tl_cmpts[cmpt_index];
size_t avail_mds = config->max_component_mds;
uct_component_attr_t uct_component_attr;
unsigned num_tl_resources;
ucs_status_t status;
Expand All @@ -1572,7 +1577,8 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
const uct_md_attr_v2_t *md_attr;

/* List memory domain resources */
uct_component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
uct_component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES |
UCT_COMPONENT_ATTR_FIELD_NAME;
uct_component_attr.md_resources =
ucs_alloca(tl_cmpt->attr.md_resource_count *
sizeof(*uct_component_attr.md_resources));
Expand All @@ -1584,6 +1590,14 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
/* Open all memory domains */
mem_type_mask = UCS_BIT(UCS_MEMORY_TYPE_HOST);
for (i = 0; i < tl_cmpt->attr.md_resource_count; ++i) {
if (avail_mds == 0) {
ucs_debug("only first %zu domains kept for component %s with %u "
"memory domains resources",
config->max_component_mds, uct_component_attr.name,
tl_cmpt->attr.md_resource_count);
break;
}

md_index = context->num_mds;
md_attr = &context->tl_mds[md_index].attr;

Expand All @@ -1603,67 +1617,71 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
goto out;
}

if (num_tl_resources > 0) {
/* List of memory type MDs */
mem_type_bitmap = md_attr->detect_mem_types;
if (~mem_type_mask & mem_type_bitmap) {
context->mem_type_detect_mds[context->num_mem_type_detect_mds] = md_index;
++context->num_mem_type_detect_mds;
mem_type_mask |= mem_type_bitmap;
}
if (num_tl_resources == 0) {
/* If the MD does not have transport resources (device or sockaddr),
* don't use it */
ucs_debug("closing md %s because it has no selected transport resources",
context->tl_mds[md_index].rsc.md_name);
uct_md_close(context->tl_mds[md_index].md);
continue;
}

ucs_memory_type_for_each(mem_type) {
if (md_attr->flags & UCT_MD_FLAG_REG) {
if ((context->config.ext.reg_nb_mem_types & UCS_BIT(mem_type)) &&
!(md_attr->reg_nonblock_mem_types & UCS_BIT(mem_type))) {
if (md_attr->reg_mem_types & UCS_BIT(mem_type)) {
/* Keep map of MDs supporting blocking registration
* if non-blocking registration is requested for the
* given memory type. In some cases blocking
* registration maybe required anyway (e.g. internal
* staging buffers for rndv pipeline protocols). */
context->reg_block_md_map[mem_type] |= UCS_BIT(md_index);
}
continue;
}
avail_mds--;

/* List of memory type MDs */
mem_type_bitmap = md_attr->detect_mem_types;
if (~mem_type_mask & mem_type_bitmap) {
context->mem_type_detect_mds[context->num_mem_type_detect_mds] = md_index;
++context->num_mem_type_detect_mds;
mem_type_mask |= mem_type_bitmap;
}

ucs_memory_type_for_each(mem_type) {
if (md_attr->flags & UCT_MD_FLAG_REG) {
if ((context->config.ext.reg_nb_mem_types & UCS_BIT(mem_type)) &&
!(md_attr->reg_nonblock_mem_types & UCS_BIT(mem_type))) {
if (md_attr->reg_mem_types & UCS_BIT(mem_type)) {
context->reg_md_map[mem_type] |= UCS_BIT(md_index);
/* Keep map of MDs supporting blocking registration
* if non-blocking registration is requested for the
* given memory type. In some cases blocking
* registration maybe required anyway (e.g. internal
* staging buffers for rndv pipeline protocols). */
context->reg_block_md_map[mem_type] |= UCS_BIT(md_index);
}
continue;
}

if (md_attr->cache_mem_types & UCS_BIT(mem_type)) {
context->cache_md_map[mem_type] |= UCS_BIT(md_index);
}
if (md_attr->reg_mem_types & UCS_BIT(mem_type)) {
context->reg_md_map[mem_type] |= UCS_BIT(md_index);
}

if ((context->config.ext.gva_enable != UCS_CONFIG_OFF) &&
(md_attr->gva_mem_types & UCS_BIT(mem_type))) {
context->gva_md_map[mem_type] |= UCS_BIT(md_index);
}
if (md_attr->cache_mem_types & UCS_BIT(mem_type)) {
context->cache_md_map[mem_type] |= UCS_BIT(md_index);
}
}

if (md_attr->flags & UCT_MD_FLAG_EXPORTED_MKEY) {
context->export_md_map |= UCS_BIT(md_index);
if ((context->config.ext.gva_enable != UCS_CONFIG_OFF) &&
(md_attr->gva_mem_types & UCS_BIT(mem_type))) {
context->gva_md_map[mem_type] |= UCS_BIT(md_index);
}
}
}

if (md_attr->flags & UCT_MD_FLAG_REG_DMABUF) {
context->dmabuf_reg_md_map |= UCS_BIT(md_index);
}
if (md_attr->flags & UCT_MD_FLAG_EXPORTED_MKEY) {
context->export_md_map |= UCS_BIT(md_index);
}

ucs_for_each_bit(mem_type, md_attr->dmabuf_mem_types) {
/* In case of multiple providers, take the first one */
if (context->dmabuf_mds[mem_type] == UCP_NULL_RESOURCE) {
context->dmabuf_mds[mem_type] = md_index;
}
if (md_attr->flags & UCT_MD_FLAG_REG_DMABUF) {
context->dmabuf_reg_md_map |= UCS_BIT(md_index);
}

ucs_for_each_bit(mem_type, md_attr->dmabuf_mem_types) {
/* In case of multiple providers, take the first one */
if (context->dmabuf_mds[mem_type] == UCP_NULL_RESOURCE) {
context->dmabuf_mds[mem_type] = md_index;
}
++context->num_mds;
} else {
/* If the MD does not have transport resources (device or sockaddr),
* don't use it */
ucs_debug("closing md %s because it has no selected transport resources",
context->tl_mds[md_index].rsc.md_name);
uct_md_close(context->tl_mds[md_index].md);
}

++context->num_mds;
}

context->mem_type_mask |= mem_type_mask;
Expand Down
2 changes: 2 additions & 0 deletions src/ucp/core/ucp_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ struct ucp_config {
ucs_list_link_t cached_key_list;
/** This config environment prefix */
char *env_prefix;
/** Maximum number of memory domains to use per component **/
size_t max_component_mds;
};


Expand Down
17 changes: 10 additions & 7 deletions src/ucp/core/ucp_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -2872,9 +2872,11 @@ ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config,
iface_attr->cap.am.max_short, sizeof(ucp_am_hdr_t),
config->am.zcopy_thresh[0], &config->rndv.am_thresh);

ucp_ep_config_set_memtype_thresh(&config->am_u.max_eager_short,
am_max_eager_short,
context->num_mem_type_detect_mds);
if (iface_attr->cap.am.max_iov >= UCP_AM_SEND_SHORT_MIN_IOV) {
ucp_ep_config_set_memtype_thresh(
&config->am_u.max_eager_short, am_max_eager_short,
context->num_mem_type_detect_mds);
}

/* All keys must fit in RNDV packet.
* TODO remove some MDs if they don't
Expand Down Expand Up @@ -2907,9 +2909,11 @@ ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config,
sizeof(ucp_am_hdr_t) + sizeof(ucp_am_reply_ftr_t),
config->am.zcopy_thresh[0], &config->rndv.am_thresh);

ucp_ep_config_set_memtype_thresh(&config->am_u.max_reply_eager_short,
am_max_eager_short,
context->num_mem_type_detect_mds);
if (iface_attr->cap.am.max_iov >= UCP_AM_SEND_SHORT_MIN_IOV) {
ucp_ep_config_set_memtype_thresh(
&config->am_u.max_reply_eager_short, am_max_eager_short,
context->num_mem_type_detect_mds);
}
} else {
/* Stub endpoint */
config->am.max_bcopy = UCP_MIN_BCOPY;
Expand Down Expand Up @@ -3889,7 +3893,6 @@ static void ucp_ep_config_proto_init(ucp_worker_h worker,
{
ucp_ep_config_t *ep_config = ucp_worker_ep_config(worker, cfg_index);
ucp_ep_config_key_t *key = &ep_config->key;

ucp_memtype_thresh_t *tag_max_short;
ucp_lane_index_t tag_exp_lane;
unsigned tag_proto_flags;
Expand Down
1 change: 0 additions & 1 deletion src/uct/base/uct_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
* is extended by a system namespace information */
#define UCT_IFACE_LOCAL_ADDR_FLAG_NS UCS_BIT(63)


enum {
UCT_EP_STAT_AM,
UCT_EP_STAT_PUT,
Expand Down
2 changes: 1 addition & 1 deletion src/uct/ib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

if HAVE_IB

SUBDIRS = . mlx5 rdmacm
SUBDIRS = . mlx5 efa rdmacm

module_LTLIBRARIES = libuct_ib.la
libuct_ib_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS)
Expand Down
5 changes: 5 additions & 0 deletions src/uct/ib/base/ib_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,11 @@ ucs_status_t uct_ib_device_init(uct_ib_device_t *dev,

dev->async_events = async_events;

if (!dev->req_notify_cq_support) {
ucs_trace("%s does not support async event handling",
uct_ib_device_name(dev));
}

uct_ib_device_get_locality(ibv_get_device_name(ibv_device),
&dev->local_cpus);

Expand Down
7 changes: 7 additions & 0 deletions src/uct/ib/base/ib_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ typedef struct uct_ib_device {
uint8_t pci_cswap_arg_sizes;
uint8_t atomic_align;
uint8_t lag_level;
uint8_t req_notify_cq_support; /* Also indicates
IBV_SEND_SOLICITED
support */
uint8_t ordered_send_comp;
uint64_t mr_access_flags;
uint32_t max_inline_data;

/* AH hash */
khash_t(uct_ib_ah) ah_hash;
ucs_recursive_spinlock_t ah_lock;
Expand Down
Loading

0 comments on commit aaa08bd

Please sign in to comment.