Skip to content

Commit 15a5eca

Browse files
committed
btl/ofi: Add cache bypass mechanism
Libfabric has its own registration cache which can cause some conflicts, particularly with cuda buffers due to their delayed invalidation strategy involving buffer ids. Added a cache bypass flag for providers known to use a registration cache. Signed-off-by: William Zhang <wilzhang@amazon.com>
1 parent c74d333 commit 15a5eca

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

opal/mca/btl/ofi/btl_ofi.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ struct mca_btl_ofi_module_t {
140140

141141
/** registration cache */
142142
mca_rcache_base_module_t *rcache;
143+
/* If the underlying OFI provider has its own cache, we want to bypass
144+
* rcache registration */
145+
bool bypass_cache;
143146
};
144147
typedef struct mca_btl_ofi_module_t mca_btl_ofi_module_t;
145148

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
575575
module->outstanding_rdma = 0;
576576
module->use_virt_addr = false;
577577
module->use_fi_mr_bind = false;
578+
module->bypass_cache = false;
578579

579580
if (ofi_info->domain_attr->mr_mode == FI_MR_BASIC
580581
|| ofi_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) {
@@ -585,6 +586,13 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
585586
module->use_fi_mr_bind = true;
586587
}
587588

589+
/* Currently there is no API to query whether the libfabric provider
590+
* uses an underlying registration cache. For now, just check for known
591+
* providers that use registration caching. */
592+
if (!strncasecmp(info->fabric_attr->prov_name, "efa", 3)) {
593+
module->bypass_cache = true;
594+
}
595+
588596
/* create endpoint list */
589597
OBJ_CONSTRUCT(&module->endpoints, opal_list_t);
590598
OBJ_CONSTRUCT(&module->module_lock, opal_mutex_t);

opal/mca/btl/ofi/btl_ofi_module.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,12 @@ mca_btl_ofi_register_mem(struct mca_btl_base_module_t *btl,
194194
mca_btl_ofi_reg_t *reg;
195195
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
196196
int rc;
197+
uint32_t cache_flags = 0;
198+
if (ofi_module->bypass_cache) {
199+
cache_flags |= MCA_RCACHE_FLAGS_CACHE_BYPASS;
200+
}
197201

198-
rc = ofi_module->rcache->rcache_register(ofi_module->rcache, base, size, 0, access_flags,
202+
rc = ofi_module->rcache->rcache_register(ofi_module->rcache, base, size, cache_flags, access_flags,
199203
(mca_rcache_base_registration_t **) &reg);
200204
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
201205
return NULL;

0 commit comments

Comments
 (0)