Skip to content

Commit b7b9254

Browse files
authored
Merge pull request #8178 from rajachan/whack-remote-cq-data-query
mtl/ofi: Check cq_data_size without querying providers again
2 parents 30831fb + 6233dea commit b7b9254

File tree

1 file changed

+7
-50
lines changed

1 file changed

+7
-50
lines changed

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -367,43 +367,6 @@ select_ofi_provider(struct fi_info *providers,
367367
return prov;
368368
}
369369

370-
371-
/* Check if FI_REMOTE_CQ_DATA is supported, if so send the source rank there
372-
* FI_DIRECTED_RECV is also needed so receives can discrimate the source
373-
*/
374-
static int
375-
ompi_mtl_ofi_check_fi_remote_cq_data(int fi_version,
376-
struct fi_info *hints,
377-
struct fi_info *provider,
378-
struct fi_info **prov_cq_data)
379-
{
380-
int ret;
381-
char *provider_name;
382-
struct fi_info *hints_dup;
383-
hints_dup = fi_dupinfo(hints);
384-
385-
provider_name = strdup(provider->fabric_attr->prov_name);
386-
hints_dup->fabric_attr->prov_name = provider_name;
387-
hints_dup->caps |= FI_TAGGED | FI_DIRECTED_RECV;
388-
/* Ask for the size that OMPI uses for the source rank number */
389-
hints_dup->domain_attr->cq_data_size = sizeof(int);
390-
ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints_dup, prov_cq_data);
391-
392-
if ((0 != ret) && (-FI_ENODATA != ret)) {
393-
opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
394-
"fi_getinfo",
395-
ompi_process_info.nodename, __FILE__, __LINE__,
396-
fi_strerror(-ret), -ret);
397-
return ret;
398-
} else if (-FI_ENODATA == ret) {
399-
/* The provider does not support FI_REMOTE_CQ_DATA */
400-
prov_cq_data = NULL;
401-
}
402-
403-
fi_freeinfo(hints_dup);
404-
return OMPI_SUCCESS;
405-
}
406-
407370
static void
408371
ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) {
409372
switch (ofi_tag_mode) {
@@ -649,7 +612,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
649612
interface and local communication and remote communication. */
650613
hints->mode = FI_CONTEXT;
651614
hints->ep_attr->type = FI_EP_RDM;
652-
hints->caps = FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM;
615+
hints->caps = FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM | FI_DIRECTED_RECV;
653616
hints->tx_attr->msg_order = FI_ORDER_SAS;
654617
hints->rx_attr->msg_order = FI_ORDER_SAS;
655618
hints->rx_attr->op_flags = FI_COMPLETION;
@@ -800,14 +763,13 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
800763
*/
801764
if ((MTL_OFI_TAG_AUTO == ofi_tag_mode) ||
802765
(MTL_OFI_TAG_FULL == ofi_tag_mode)) {
803-
ret = ompi_mtl_ofi_check_fi_remote_cq_data(fi_version,
804-
hints, prov,
805-
&prov_cq_data);
806-
if (OMPI_SUCCESS != ret) {
807-
goto error;
808-
} else if (NULL == prov_cq_data) {
766+
if (prov->domain_attr->cq_data_size >= sizeof(int) &&
767+
(prov->caps & FI_DIRECTED_RECV)) {
768+
/* Use FI_REMOTE_CQ_DATA */
769+
ompi_mtl_ofi.fi_cq_data = true;
770+
ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_FULL, &ofi_tag_bits_for_cid);
771+
} else {
809772
/* No support for FI_REMTOTE_CQ_DATA */
810-
fi_freeinfo(prov_cq_data);
811773
ompi_mtl_ofi.fi_cq_data = false;
812774
if (MTL_OFI_TAG_AUTO == ofi_tag_mode) {
813775
/* Fallback to MTL_OFI_TAG_1 */
@@ -818,11 +780,6 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
818780
__FILE__, __LINE__, prov->fabric_attr->prov_name);
819781
goto error;
820782
}
821-
} else {
822-
/* Use FI_REMTOTE_CQ_DATA */
823-
ompi_mtl_ofi.fi_cq_data = true;
824-
prov = prov_cq_data;
825-
ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_FULL, &ofi_tag_bits_for_cid);
826783
}
827784
} else { /* MTL_OFI_TAG_1 or MTL_OFI_TAG_2 */
828785
ompi_mtl_ofi.fi_cq_data = false;

0 commit comments

Comments
 (0)