@@ -491,7 +491,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) {
491
491
} \
492
492
} while(0);
493
493
494
- static int ompi_mtl_ofi_init_sep (struct fi_info * prov )
494
+ static int ompi_mtl_ofi_init_sep (struct fi_info * prov , int universe_size )
495
495
{
496
496
int ret = OMPI_SUCCESS , num_ofi_ctxts ;
497
497
struct fi_av_attr av_attr = {0 };
@@ -513,7 +513,7 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
513
513
514
514
av_attr .type = (MTL_OFI_AV_TABLE == av_type ) ? FI_AV_TABLE : FI_AV_MAP ;
515
515
av_attr .rx_ctx_bits = ompi_mtl_ofi .rx_ctx_bits ;
516
- av_attr .count = ompi_mtl_ofi .num_ofi_contexts ;
516
+ av_attr .count = ompi_mtl_ofi .num_ofi_contexts * universe_size ;
517
517
ret = fi_av_open (ompi_mtl_ofi .domain , & av_attr , & ompi_mtl_ofi .av , NULL );
518
518
519
519
if (0 != ret ) {
@@ -546,7 +546,7 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
546
546
return ret ;
547
547
}
548
548
549
- static int ompi_mtl_ofi_init_regular_ep (struct fi_info * prov )
549
+ static int ompi_mtl_ofi_init_regular_ep (struct fi_info * prov , int universe_size )
550
550
{
551
551
int ret = OMPI_SUCCESS ;
552
552
struct fi_av_attr av_attr = {0 };
@@ -574,6 +574,7 @@ static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov)
574
574
* - address vector and completion queues
575
575
*/
576
576
av_attr .type = (MTL_OFI_AV_TABLE == av_type ) ? FI_AV_TABLE : FI_AV_MAP ;
577
+ av_attr .count = universe_size ;
577
578
ret = fi_av_open (ompi_mtl_ofi .domain , & av_attr , & ompi_mtl_ofi .av , NULL );
578
579
if (ret ) {
579
580
MTL_OFI_LOG_FI_ERR (ret , "fi_av_open failed" );
@@ -626,6 +627,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
626
627
struct fi_info * prov_cq_data = NULL ;
627
628
char ep_name [FI_NAME_MAX ] = {0 };
628
629
size_t namelen ;
630
+ int universe_size ;
631
+ char * univ_size_str ;
629
632
630
633
/**
631
634
* Hints to filter providers
@@ -897,21 +900,35 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
897
900
* vectors, completion counters or event queues etc, and enabled.
898
901
* See man fi_endpoint for more details.
899
902
*/
900
- max_ofi_ctxts = (prov -> domain_attr -> max_ep_tx_ctx <
901
- prov -> domain_attr -> max_ep_rx_ctx ) ?
902
- prov -> domain_attr -> max_ep_tx_ctx :
903
- prov -> domain_attr -> max_ep_rx_ctx ;
904
-
905
- num_local_ranks = 1 + ompi_process_info .num_local_peers ;
906
- if ((max_ofi_ctxts <= num_local_ranks ) &&
907
- (1 == ompi_mtl_ofi .enable_sep )) {
908
- opal_show_help ("help-mtl-ofi.txt" , "Local ranks exceed ofi contexts" ,
909
- true, prov -> fabric_attr -> prov_name ,
910
- ompi_process_info .nodename , __FILE__ , __LINE__ );
911
- goto error ;
903
+
904
+ /* use the universe size as a rough guess on the address vector
905
+ * size hint that should be passed to fi_av_open(). For regular
906
+ * endpoints, the count will be the universe size. For scalable
907
+ * endpoints, the count will be the universe size multiplied by
908
+ * the number of contexts. In either case, if the universe grows
909
+ * (via dynamic processes), the count is a hint, not a hard limit,
910
+ * so libfabric will just be slightly less efficient.
911
+ */
912
+ univ_size_str = getenv ("OMPI_UNIVERSE_SIZE" );
913
+ if (NULL == univ_size_str ||
914
+ (universe_size = strtol (univ_size_str , NULL , 0 )) <= 0 ) {
915
+ universe_size = ompi_proc_world_size ();
912
916
}
913
917
914
918
if (1 == ompi_mtl_ofi .enable_sep ) {
919
+ max_ofi_ctxts = (prov -> domain_attr -> max_ep_tx_ctx <
920
+ prov -> domain_attr -> max_ep_rx_ctx ) ?
921
+ prov -> domain_attr -> max_ep_tx_ctx :
922
+ prov -> domain_attr -> max_ep_rx_ctx ;
923
+
924
+ num_local_ranks = 1 + ompi_process_info .num_local_peers ;
925
+ if (max_ofi_ctxts <= num_local_ranks ) {
926
+ opal_show_help ("help-mtl-ofi.txt" , "Local ranks exceed ofi contexts" ,
927
+ true, prov -> fabric_attr -> prov_name ,
928
+ ompi_process_info .nodename , __FILE__ , __LINE__ );
929
+ goto error ;
930
+ }
931
+
915
932
/* Provision enough contexts to service all ranks in a node */
916
933
max_ofi_ctxts /= num_local_ranks ;
917
934
@@ -926,9 +943,9 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
926
943
ompi_mtl_ofi .num_ofi_contexts = max_ofi_ctxts ;
927
944
}
928
945
929
- ret = ompi_mtl_ofi_init_sep (prov );
946
+ ret = ompi_mtl_ofi_init_sep (prov , universe_size );
930
947
} else {
931
- ret = ompi_mtl_ofi_init_regular_ep (prov );
948
+ ret = ompi_mtl_ofi_init_regular_ep (prov , universe_size );
932
949
}
933
950
934
951
if (OMPI_SUCCESS != ret ) {
0 commit comments