Skip to content

Commit ffab0a4

Browse files
committed
coll/han: call fallback functin when HAN module is disabled
This patch is to address: #11448 When Open MPI is compiled with CUDA support, comm->c_coll->coll_xxx_module is coll_cuda_module and HAN_LOAD_FALLBACK_COLLECTIVE is a no-op. As a result, HAN's collective functions can be called even if HAN has been disabled, which resulted an infinitely recursive calling loop. To address this issue, this patch make HAN's collective fucntion to call fallback function when HAN module was disabled. Signed-off-by: Wei Zhang <wzam@amazon.com>
1 parent da6cb6b commit ffab0a4

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

ompi/mca/coll/han/coll_han_dynamic.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,11 @@ mca_coll_han_allreduce_intra_dynamic(const void *sbuf,
614614
size_t dtype_size;
615615
int rank, verbosity = 0;
616616

617+
if (!han_module->enabled) {
618+
return han_module->fallback.allreduce.module_fn.allreduce(sbuf, rbuf, count, dtype, op, comm,
619+
han_module->fallback.allreduce.module);
620+
}
621+
617622
/* Compute configuration information for dynamic rules */
618623
ompi_datatype_type_size(dtype, &dtype_size);
619624
dtype_size = dtype_size * count;
@@ -722,6 +727,9 @@ mca_coll_han_barrier_intra_dynamic(struct ompi_communicator_t *comm,
722727
mca_coll_base_module_t *sub_module;
723728
int rank, verbosity = 0;
724729

730+
if (!han_module->enabled) {
731+
return han_module->fallback.barrier.module_fn.barrier(comm, han_module->fallback.barrier.module);
732+
}
725733

726734
/* Compute configuration information for dynamic rules */
727735
sub_module = get_module(BARRIER,
@@ -821,6 +829,11 @@ mca_coll_han_bcast_intra_dynamic(void *buff,
821829
size_t dtype_size;
822830
int rank, verbosity = 0;
823831

832+
if (!han_module->enabled) {
833+
return han_module->fallback.bcast.module_fn.bcast(buff, count, dtype, root, comm,
834+
han_module->fallback.bcast.module);
835+
}
836+
824837
/* Compute configuration information for dynamic rules */
825838
ompi_datatype_type_size(dtype, &dtype_size);
826839
dtype_size = dtype_size * count;
@@ -932,6 +945,11 @@ mca_coll_han_gather_intra_dynamic(const void *sbuf, int scount,
932945
size_t dtype_size;
933946
int rank, verbosity = 0;
934947

948+
if (!han_module->enabled) {
949+
return han_module->fallback.gather.module_fn.gather(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm,
950+
han_module->fallback.gather.module);
951+
}
952+
935953
/* Compute configuration information for dynamic rules */
936954
if( MPI_IN_PLACE != sbuf ) {
937955
ompi_datatype_type_size(sdtype, &dtype_size);
@@ -1051,6 +1069,11 @@ mca_coll_han_reduce_intra_dynamic(const void *sbuf,
10511069
size_t dtype_size;
10521070
int rank, verbosity = 0;
10531071

1072+
if (!han_module->enabled) {
1073+
return han_module->fallback.reduce.module_fn.reduce(sbuf, rbuf, count, dtype, op, root, comm,
1074+
han_module->fallback.reduce.module);
1075+
}
1076+
10541077
/* Compute configuration information for dynamic rules */
10551078
ompi_datatype_type_size(dtype, &dtype_size);
10561079
dtype_size = dtype_size * count;
@@ -1167,6 +1190,11 @@ mca_coll_han_scatter_intra_dynamic(const void *sbuf, int scount,
11671190
size_t dtype_size;
11681191
int rank, verbosity = 0;
11691192

1193+
if (!han_module->enabled) {
1194+
return han_module->fallback.scatter.module_fn.scatter(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm,
1195+
han_module->fallback.scatter.module);
1196+
}
1197+
11701198
/* Compute configuration information for dynamic rules */
11711199
if( MPI_IN_PLACE != rbuf ) {
11721200
ompi_datatype_type_size(rdtype, &dtype_size);

0 commit comments

Comments
 (0)