@@ -387,10 +387,10 @@ ompi_mtl_ofi_check_fi_remote_cq_data(int fi_version,
387
387
}
388
388
389
389
static void
390
- ompi_mtl_ofi_define_tag_mode (int ofi_tag_mode ) {
390
+ ompi_mtl_ofi_define_tag_mode (int ofi_tag_mode , int * bits_for_cid ) {
391
391
switch (ofi_tag_mode ) {
392
392
case MTL_OFI_TAG_1 :
393
- ompi_mtl_ofi . base . mtl_max_contextid = (int )(( 1ULL << MTL_OFI_CID_BIT_COUNT_1 ) - 1 ) ;
393
+ * bits_for_cid = (int ) MTL_OFI_CID_BIT_COUNT_1 ;
394
394
ompi_mtl_ofi .base .mtl_max_tag = (int )((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1 )) - 1 );
395
395
396
396
ompi_mtl_ofi .source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_1 ;
@@ -405,7 +405,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode) {
405
405
ompi_mtl_ofi .sync_proto_mask = MTL_OFI_PROTO_MASK_1 ;
406
406
break ;
407
407
case MTL_OFI_TAG_2 :
408
- ompi_mtl_ofi . base . mtl_max_contextid = (int )(( 1ULL << MTL_OFI_CID_BIT_COUNT_2 ) - 1 ) ;
408
+ * bits_for_cid = (int ) MTL_OFI_CID_BIT_COUNT_2 ;
409
409
ompi_mtl_ofi .base .mtl_max_tag = (int )((1ULL << (MTL_OFI_TAG_BIT_COUNT_2 - 1 )) - 1 );
410
410
411
411
ompi_mtl_ofi .source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_2 ;
@@ -420,7 +420,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode) {
420
420
ompi_mtl_ofi .sync_proto_mask = MTL_OFI_PROTO_MASK_2 ;
421
421
break ;
422
422
default : /* use FI_REMOTE_CQ_DATA */
423
- ompi_mtl_ofi . base . mtl_max_contextid = (int )(( 1ULL << MTL_OFI_CID_BIT_COUNT_DATA ) - 1 ) ;
423
+ * bits_for_cid = (int ) MTL_OFI_CID_BIT_COUNT_DATA ;
424
424
ompi_mtl_ofi .base .mtl_max_tag = (int )((1ULL << (MTL_OFI_TAG_BIT_COUNT_DATA - 1 )) - 1 );
425
425
426
426
ompi_mtl_ofi .mpi_tag_mask = MTL_OFI_TAG_MASK_DATA ;
@@ -444,6 +444,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
444
444
struct fi_av_attr av_attr = {0 };
445
445
char ep_name [FI_NAME_MAX ] = {0 };
446
446
size_t namelen ;
447
+ int ofi_tag_leading_zeros ;
448
+ int ofi_tag_bits_for_cid ;
447
449
448
450
/**
449
451
* Hints to filter providers
@@ -558,7 +560,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
558
560
ompi_mtl_ofi .fi_cq_data = false;
559
561
if (MTL_OFI_TAG_AUTO == ofi_tag_mode ) {
560
562
/* Fallback to MTL_OFI_TAG_1 */
561
- ompi_mtl_ofi_define_tag_mode (MTL_OFI_TAG_1 );
563
+ ompi_mtl_ofi_define_tag_mode (MTL_OFI_TAG_1 , & ofi_tag_bits_for_cid );
562
564
} else { /* MTL_OFI_TAG_FULL */
563
565
opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
564
566
"%s:%d: OFI provider %s does not support FI_REMOTE_CQ_DATA\n" ,
@@ -569,13 +571,37 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
569
571
/* Use FI_REMTOTE_CQ_DATA */
570
572
ompi_mtl_ofi .fi_cq_data = true;
571
573
prov = prov_cq_data ;
572
- ompi_mtl_ofi_define_tag_mode (MTL_OFI_TAG_FULL );
574
+ ompi_mtl_ofi_define_tag_mode (MTL_OFI_TAG_FULL , & ofi_tag_bits_for_cid );
573
575
}
574
576
} else { /* MTL_OFI_TAG_1 or MTL_OFI_TAG_2 */
575
577
ompi_mtl_ofi .fi_cq_data = false;
576
- ompi_mtl_ofi_define_tag_mode (ofi_tag_mode );
578
+ ompi_mtl_ofi_define_tag_mode (ofi_tag_mode , & ofi_tag_bits_for_cid );
577
579
}
578
580
581
+ /**
582
+ * Check for potential bits in the OFI tag that providers may be reserving
583
+ * for internal usage (see mem_tag_format in fi_endpoint man page).
584
+ */
585
+
586
+ ofi_tag_leading_zeros = 0 ;
587
+ while (!((prov -> ep_attr -> mem_tag_format << ofi_tag_leading_zeros ++ ) &
588
+ (uint64_t ) MTL_OFI_HIGHEST_TAG_BIT ) &&
589
+ /* Do not keep looping if the provider does not support enough bits */
590
+ (ofi_tag_bits_for_cid >= MTL_OFI_MINIMUM_CID_BITS )){
591
+ ofi_tag_bits_for_cid -- ;
592
+ }
593
+
594
+ if (ofi_tag_bits_for_cid < MTL_OFI_MINIMUM_CID_BITS ) {
595
+ opal_show_help ("help-mtl-ofi.txt" , "Not enough bits for CID" , true,
596
+ prov -> fabric_attr -> prov_name ,
597
+ prov -> fabric_attr -> prov_name ,
598
+ ompi_process_info .nodename , __FILE__ , __LINE__ );
599
+ goto error ;
600
+ }
601
+
602
+ /* Update the maximum supported Communicator ID */
603
+ ompi_mtl_ofi .base .mtl_max_contextid = (int )((1ULL << ofi_tag_bits_for_cid ) - 1 );
604
+
579
605
/**
580
606
* Open fabric
581
607
* The getinfo struct returns a fabric attribute struct that can be used to
0 commit comments