Skip to content

Commit 3960b69

Browse files
committed
coll/base: add support for component name in output
add the ability to output which component provides which collective operation. The feature is controlled by the mca_coll_base_verbose variable. Specifically mca_coll_base_verbose > 0 and < 20: - output will be provided for MPI_COMM_WORLD only, and only for the blocking and non-blocking collectives mca_coll_base_verbose = 20: - output will be provided for all communicators, but only for blocking and non-blocking collectives mca_coll_base_verbose > 20: - output will be provided for all communicators and all collectives (including persistent and ft) Note that the values are up for negotiation. I am also open to use an entirely new mca parameter that would allow for more natural specification of which communicator/operation we want the output for. Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent 9fcfec2 commit 3960b69

File tree

1 file changed

+135
-2
lines changed

1 file changed

+135
-2
lines changed

ompi/mca/coll/base/coll_base_comm_select.c

Lines changed: 135 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "opal/util/argv.h"
4242
#include "opal/util/show_help.h"
4343
#include "opal/class/opal_list.h"
44+
#include "opal/class/opal_hash_table.h"
4445
#include "opal/class/opal_object.h"
4546
#include "ompi/mca/mca.h"
4647
#include "opal/mca/base/base.h"
@@ -86,6 +87,135 @@ static int query_2_4_0(const mca_coll_base_component_2_4_0_t *
8687
#define CHECK_NULL(what, comm, func) \
8788
( (what) = # func , NULL == (comm)->c_coll->coll_ ## func)
8889

90+
static void mca_coll_base_get_component_name(ompi_communicator_t *comm, void* module, char** name)
91+
{
92+
mca_coll_base_avail_coll_t *avail;
93+
94+
*name = NULL;
95+
OPAL_LIST_FOREACH(avail, comm->c_coll->module_list, mca_coll_base_avail_coll_t) {
96+
if (avail->ac_module == module) {
97+
*name = (char*) avail->ac_component_name;
98+
break;
99+
}
100+
}
101+
}
102+
103+
#define PRINT_NAME(comm, func, func_name) \
104+
do { \
105+
char *name; \
106+
mca_coll_base_get_component_name(comm, (void*)comm->c_coll->coll_ ## func ## _module, &name); \
107+
opal_output_verbose(10, ompi_coll_base_framework.framework_output, \
108+
"coll:base:comm_select: communicator %s rank %d %s -> %s", comm->c_name, comm->c_my_rank, func_name, name); \
109+
} while (0);
110+
111+
#define PRINT_ALL_BLOCKING(comm) \
112+
do { \
113+
PRINT_NAME(comm, allgather, "allgather"); \
114+
PRINT_NAME(comm, allgatherv, "allgatherv"); \
115+
PRINT_NAME(comm, allreduce, "allreduce"); \
116+
PRINT_NAME(comm, alltoall, "alltoall"); \
117+
PRINT_NAME(comm, alltoallv, "alltoallv"); \
118+
PRINT_NAME(comm, alltoallw, "alltoallw"); \
119+
PRINT_NAME(comm, barrier, "barrier"); \
120+
PRINT_NAME(comm, bcast, "bcast"); \
121+
PRINT_NAME(comm, exscan, "exscan"); \
122+
PRINT_NAME(comm, gather, "gather"); \
123+
PRINT_NAME(comm, gatherv, "gatherv"); \
124+
PRINT_NAME(comm, reduce, "reduce"); \
125+
PRINT_NAME(comm, reduce_scatter_block, "reduce_scatter_block"); \
126+
PRINT_NAME(comm, reduce_scatter, "reduce_scatter"); \
127+
PRINT_NAME(comm, scan, "scan"); \
128+
PRINT_NAME(comm, scatter, "scatter"); \
129+
PRINT_NAME(comm, scatterv, "scatterv"); \
130+
PRINT_NAME(comm, neighbor_allgather, "neighbor_allgather"); \
131+
PRINT_NAME(comm, neighbor_allgatherv, "neighbor_allgatherv"); \
132+
PRINT_NAME(comm, neighbor_alltoall, "neighbor_alltoall"); \
133+
PRINT_NAME(comm, neighbor_alltoallv, "neighbor_alltoallv"); \
134+
PRINT_NAME(comm, neighbor_alltoallw, "neighbor_alltoallw"); \
135+
PRINT_NAME(comm, reduce_local, "reduce_local"); \
136+
} while (0);
137+
138+
#define PRINT_ALL_NB(comm) \
139+
do { \
140+
PRINT_NAME(comm, iallgather, "iallgather"); \
141+
PRINT_NAME(comm, iallgatherv, "iallgatherv");\
142+
PRINT_NAME(comm, iallreduce, "iallreduce"); \
143+
PRINT_NAME(comm, ialltoall, "ialltoall"); \
144+
PRINT_NAME(comm, ialltoallv, "ialltoallv"); \
145+
PRINT_NAME(comm, ialltoallw, "ialltoallw"); \
146+
PRINT_NAME(comm, ibarrier, "ibarrier"); \
147+
PRINT_NAME(comm, ibcast, "ibcast"); \
148+
PRINT_NAME(comm, iexscan, "iexscan"); \
149+
PRINT_NAME(comm, igather, "igather"); \
150+
PRINT_NAME(comm, igatherv, "igatherv"); \
151+
PRINT_NAME(comm, ireduce, "ireduce"); \
152+
PRINT_NAME(comm, ireduce_scatter_block, "ireduce_scatter_block"); \
153+
PRINT_NAME(comm, ireduce_scatter, "ireduce_scatter"); \
154+
PRINT_NAME(comm, iscan, "iscan"); \
155+
PRINT_NAME(comm, iscatter, "iscatter"); \
156+
PRINT_NAME(comm, iscatterv, "iscatterv"); \
157+
PRINT_NAME(comm, ineighbor_allgather, "ineighbor_allgather"); \
158+
PRINT_NAME(comm, ineighbor_allgatherv, "ineighbor_allgatherv"); \
159+
PRINT_NAME(comm, ineighbor_alltoall, "ineighbor_alltoall"); \
160+
PRINT_NAME(comm, ineighbor_alltoallv, "ineighbor_alltoallv"); \
161+
PRINT_NAME(comm, ineighbor_alltoallw, "ineighbor_alltoallw"); \
162+
} while (0);
163+
164+
#define PRINT_ALL_PERSISTENT(comm) \
165+
do { \
166+
PRINT_NAME(comm, allgather_init, "allgather_init"); \
167+
PRINT_NAME(comm, allgatherv_init, "allgatherv_init"); \
168+
PRINT_NAME(comm, allreduce_init, "allreduce_init"); \
169+
PRINT_NAME(comm, alltoall_init, "alltoall_init"); \
170+
PRINT_NAME(comm, alltoallv_init, "alltoallv_init"); \
171+
PRINT_NAME(comm, alltoallw_init, "alltoallw_init"); \
172+
PRINT_NAME(comm, barrier_init, "barrier_init"); \
173+
PRINT_NAME(comm, bcast_init, "bcast_init"); \
174+
PRINT_NAME(comm, exscan_init, "exscan_init"); \
175+
PRINT_NAME(comm, gather_init, "gather_init"); \
176+
PRINT_NAME(comm, gatherv_init, "gatherv_init"); \
177+
PRINT_NAME(comm, reduce_init, "reduce_init"); \
178+
PRINT_NAME(comm, reduce_scatter_block_init, "reduce_scatter_block_init"); \
179+
PRINT_NAME(comm, reduce_scatter_init, "reduce_scatter_init"); \
180+
PRINT_NAME(comm, scan_init, "scan_init"); \
181+
PRINT_NAME(comm, scatter_init, "scatter_init"); \
182+
PRINT_NAME(comm, scatterv_init, "scatterv_init"); \
183+
PRINT_NAME(comm, neighbor_allgather_init, "neighbor_allgather_init"); \
184+
PRINT_NAME(comm, neighbor_allgatherv_init, "neighbor_allgatherv_init"); \
185+
PRINT_NAME(comm, neighbor_alltoall_init, "neighbor_alltoall_init"); \
186+
PRINT_NAME(comm, neighbor_alltoallv_init, "neighbor_alltoallv_init"); \
187+
PRINT_NAME(comm, neighbor_alltoallw_init, "neighbor_alltoallw_init"); \
188+
} while (0);
189+
190+
#define PRINT_ALL_FT(comm) \
191+
do { \
192+
PRINT_NAME(comm, agree, "agree"); \
193+
PRINT_NAME(comm, iagree, "iagree"); \
194+
} while (0);
195+
196+
static void mca_coll_base_print_component_names(ompi_communicator_t *comm)
197+
{
198+
/*
199+
** Verbosity level 1 - 19 will only print the blocking and non-blocking collectives
200+
** assigned to MPI_COMM_WORLD, but not the persistent and ft ones.
201+
**
202+
** Verbosity level 20 will print all blocking and non-blocking collectives for all communicators,
203+
** but not the persistent and ft ones.
204+
**
205+
** Verbosity level > 20 will print all collectives for all communicators.
206+
*/
207+
if ( (MPI_COMM_WORLD == comm) || (ompi_coll_base_framework.framework_verbose >= 20)) {
208+
PRINT_ALL_BLOCKING (comm);
209+
PRINT_ALL_NB (comm);
210+
if (ompi_coll_base_framework.framework_verbose > 20) {
211+
PRINT_ALL_PERSISTENT (comm);
212+
#if OPAL_ENABLE_FT_MPI
213+
PRINT_ALL_FT (comm);
214+
#endif
215+
}
216+
}
217+
}
218+
89219
/*
90220
* This function is called at the initialization time of every
91221
* communicator. It is used to select which coll component will be
@@ -134,7 +264,6 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
134264
NULL != item; item = opal_list_remove_first(selectable)) {
135265

136266
mca_coll_base_avail_coll_t *avail = (mca_coll_base_avail_coll_t *) item;
137-
138267
/* initialize the module */
139268
ret = avail->ac_module->coll_module_enable(avail->ac_module, comm);
140269

@@ -235,7 +364,6 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
235364
OBJ_RELEASE(avail);
236365
}
237366
}
238-
239367
/* Done with the list from the check_components() call so release it. */
240368
OBJ_RELEASE(selectable);
241369

@@ -301,6 +429,11 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
301429
mca_coll_base_comm_unselect(comm);
302430
return OMPI_ERR_NOT_FOUND;
303431
}
432+
433+
if (ompi_coll_base_framework.framework_verbose > 0) {
434+
mca_coll_base_print_component_names(comm);
435+
}
436+
304437
return OMPI_SUCCESS;
305438
}
306439

0 commit comments

Comments
 (0)