@@ -106,7 +106,7 @@ static int validate_info(struct fi_info *info, uint64_t required_caps, char **in
106
106
mr_mode = info -> domain_attr -> mr_mode ;
107
107
108
108
if (!(mr_mode == FI_MR_BASIC || mr_mode == FI_MR_SCALABLE
109
- || (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT )) == 0 )) {
109
+ || (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT | FI_MR_HMEM )) == 0 )) {
110
110
BTL_VERBOSE (("unsupported MR mode" ));
111
111
return OPAL_ERROR ;
112
112
}
@@ -256,8 +256,8 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
256
256
libfabric_api = fi_version ();
257
257
258
258
/* bail if OFI version is less than 1.5. */
259
- if (libfabric_api < FI_VERSION (1 , 5 )) {
260
- BTL_VERBOSE (("ofi btl disqualified because OFI version < 1.5 ." ));
259
+ if (libfabric_api < FI_VERSION (1 , 9 )) {
260
+ BTL_VERBOSE (("ofi btl disqualified because OFI version < 1.9 ." ));
261
261
return NULL ;
262
262
}
263
263
@@ -339,16 +339,43 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
339
339
340
340
mca_btl_ofi_component .module_count = 0 ;
341
341
342
- /* do the query. */
343
- rc = fi_getinfo (FI_VERSION (1 , 5 ), NULL , NULL , 0 , & hints , & info_list );
342
+ /* Request device transfer capabilities, separate from required_caps */
343
+ hints .caps |= FI_HMEM ;
344
+ hints .domain_attr -> mr_mode |= FI_MR_HMEM ;
345
+ no_hmem :
346
+
347
+ /* Do the query. The earliest version that supports FI_HMEM hints is 1.9 */
348
+ rc = fi_getinfo (FI_VERSION (1 , 9 ), NULL , NULL , 0 , & hints , & info_list );
344
349
if (0 != rc ) {
350
+ if (hints .caps & FI_HMEM ) {
351
+ /* Try again without FI_HMEM hints */
352
+ hints .caps &= ~FI_HMEM ;
353
+ hints .domain_attr -> mr_mode &= ~FI_MR_HMEM ;
354
+ goto no_hmem ;
355
+ }
345
356
BTL_VERBOSE (("fi_getinfo failed with code %d: %s" , rc , fi_strerror (- rc )));
346
357
if (NULL != include_list ) {
347
358
opal_argv_free (include_list );
348
359
}
349
360
return NULL ;
350
361
}
351
362
363
+ /* If we get to this point with FI_HMEM hint set, we want it to be a
364
+ * required capability
365
+ */
366
+ if (hints .caps & FI_HMEM ) {
367
+ /* The EFA provider has a bug where it incorrectly advertises FI_HMEM +
368
+ * FI_ATOMIC capability without being able to provide that support in
369
+ * versions before libfabric 1.18.0
370
+ */
371
+ if (libfabric_api < FI_VERSION (1 , 18 ) && !strncasecmp (info_list -> fabric_attr -> prov_name , "efa" , 3 )) {
372
+ hints .caps &= ~FI_HMEM ;
373
+ hints .domain_attr -> mr_mode &= ~FI_MR_HMEM ;
374
+ goto no_hmem ;
375
+ }
376
+ required_caps |= FI_HMEM ;
377
+ }
378
+
352
379
/* count the number of resources/ */
353
380
info = info_list ;
354
381
while (info ) {
0 commit comments