Skip to content

Commit b0429d2

Browse files
committed
coll/libnbc: add knomial tree algorithm for MPI_Ibcast
Signed-off-by: Mikhail Kurnosov <mkurnosov@gmail.com>
1 parent b0e6d1f commit b0429d2

File tree

3 files changed

+111
-13
lines changed

3 files changed

+111
-13
lines changed

ompi/mca/coll/libnbc/coll_libnbc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ BEGIN_C_DECLS
7070
#define NBC_NUM_COLL 17
7171

7272
extern bool libnbc_ibcast_skip_dt_decision;
73+
extern int libnbc_ibcast_algorithm;
74+
extern int libnbc_ibcast_knomial_radix;
7375
extern int libnbc_iexscan_algorithm;
7476
extern int libnbc_iscan_algorithm;
7577

ompi/mca/coll/libnbc/coll_libnbc_component.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ static int libnbc_priority = 10;
4646
static bool libnbc_in_progress = false; /* protect from recursive calls */
4747
bool libnbc_ibcast_skip_dt_decision = true;
4848

49+
int libnbc_ibcast_algorithm = 0; /* ibcast user forced algorithm */
50+
int libnbc_ibcast_knomial_radix = 4;
51+
static mca_base_var_enum_value_t ibcast_algorithms[] = {
52+
{0, "ignore"},
53+
{1, "linear"},
54+
{2, "binomial"},
55+
{3, "chain"},
56+
{4, "knomial"},
57+
{0, NULL}
58+
};
59+
4960
int libnbc_iexscan_algorithm = 0; /* iexscan user forced algorithm */
5061
static mca_base_var_enum_value_t iexscan_algorithms[] = {
5162
{0, "ignore"},
@@ -175,6 +186,24 @@ libnbc_register(void)
175186
MCA_BASE_VAR_SCOPE_READONLY,
176187
&libnbc_ibcast_skip_dt_decision);
177188

189+
libnbc_ibcast_algorithm = 0;
190+
(void) mca_base_var_enum_create("coll_libnbc_ibcast_algorithms", ibcast_algorithms, &new_enum);
191+
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
192+
"ibcast_algorithm",
193+
"Which ibcast algorithm is used: 0 ignore, 1 linear, 2 binomial, 3 chain, 4 knomial",
194+
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
195+
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
196+
&libnbc_ibcast_algorithm);
197+
OBJ_RELEASE(new_enum);
198+
199+
libnbc_ibcast_knomial_radix = 4;
200+
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
201+
"ibcast_knomial_radix", "k-nomial tree radix for the ibcast algorithm (radix > 1)",
202+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
203+
OPAL_INFO_LVL_9,
204+
MCA_BASE_VAR_SCOPE_READONLY,
205+
&libnbc_ibcast_knomial_radix);
206+
178207
libnbc_iexscan_algorithm = 0;
179208
(void) mca_base_var_enum_create("coll_libnbc_iexscan_algorithms", iexscan_algorithms, &new_enum);
180209
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,

ompi/mca/coll/libnbc/nbc_ibcast.c

Lines changed: 80 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *sc
2626
MPI_Datatype datatype);
2727
static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count,
2828
MPI_Datatype datatype, int fragsize, size_t size);
29+
static inline int bcast_sched_knomial(int rank, int comm_size, int root, NBC_Schedule *schedule, void *buf,
30+
int count, MPI_Datatype datatype, int knomial_radix);
2931

3032
#ifdef NBC_CACHE_SCHEDULE
3133
/* tree comparison function for schedule cache */
@@ -55,7 +57,7 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
5557
#ifdef NBC_CACHE_SCHEDULE
5658
NBC_Bcast_args *args, *found, search;
5759
#endif
58-
enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg;
60+
enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN, NBC_BCAST_KNOMIAL } alg;
5961
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
6062

6163
rank = ompi_comm_rank (comm);
@@ -73,25 +75,40 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
7375

7476
segsize = 16384;
7577
/* algorithm selection */
76-
if( libnbc_ibcast_skip_dt_decision ) {
77-
if (p <= 4) {
78-
alg = NBC_BCAST_LINEAR;
78+
if (libnbc_ibcast_algorithm == 0) {
79+
if( libnbc_ibcast_skip_dt_decision ) {
80+
if (p <= 4) {
81+
alg = NBC_BCAST_LINEAR;
82+
}
83+
else {
84+
alg = NBC_BCAST_BINOMIAL;
85+
}
7986
}
8087
else {
81-
alg = NBC_BCAST_BINOMIAL;
88+
if (p <= 4) {
89+
alg = NBC_BCAST_LINEAR;
90+
} else if (size * count < 65536) {
91+
alg = NBC_BCAST_BINOMIAL;
92+
} else if (size * count < 524288) {
93+
alg = NBC_BCAST_CHAIN;
94+
segsize = 8192;
95+
} else {
96+
alg = NBC_BCAST_CHAIN;
97+
segsize = 32768;
98+
}
8299
}
83-
}
84-
else {
85-
if (p <= 4) {
100+
} else {
101+
/* user forced dynamic decision */
102+
if (libnbc_ibcast_algorithm == 1) {
86103
alg = NBC_BCAST_LINEAR;
87-
} else if (size * count < 65536) {
104+
} else if (libnbc_ibcast_algorithm == 2) {
88105
alg = NBC_BCAST_BINOMIAL;
89-
} else if (size * count < 524288) {
106+
} else if (libnbc_ibcast_algorithm == 3) {
90107
alg = NBC_BCAST_CHAIN;
91-
segsize = 8192;
108+
} else if (libnbc_ibcast_algorithm == 4 && libnbc_ibcast_knomial_radix > 1) {
109+
alg = NBC_BCAST_KNOMIAL;
92110
} else {
93-
alg = NBC_BCAST_CHAIN;
94-
segsize = 32768;
111+
alg = NBC_BCAST_LINEAR;
95112
}
96113
}
97114

@@ -119,6 +136,9 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
119136
case NBC_BCAST_CHAIN:
120137
res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size);
121138
break;
139+
case NBC_BCAST_KNOMIAL:
140+
res = bcast_sched_knomial(rank, p, root, schedule, buffer, count, datatype, libnbc_ibcast_knomial_radix);
141+
break;
122142
}
123143

124144
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
@@ -342,6 +362,53 @@ static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *sch
342362
return OMPI_SUCCESS;
343363
}
344364

365+
/*
366+
* bcast_sched_knomial:
367+
*
368+
* Description: an implementation of Ibcast using k-nomial tree algorithm
369+
*
370+
* Time: (radix - 1)O(log_{radix}(comm_size))
371+
* Memory: O(reqs_max)
372+
* Schedule length (rounds): O(log(comm_size))
373+
*/
374+
static inline int bcast_sched_knomial(
375+
int rank, int comm_size, int root, NBC_Schedule *schedule, void *buf,
376+
int count, MPI_Datatype datatype, int knomial_radix)
377+
{
378+
int res = OMPI_SUCCESS;
379+
380+
/* Receive from parent */
381+
int vrank = (rank - root + comm_size) % comm_size;
382+
int mask = 0x1;
383+
while (mask < comm_size) {
384+
if (vrank % (knomial_radix * mask)) {
385+
int parent = vrank / (knomial_radix * mask) * (knomial_radix * mask);
386+
parent = (parent + root) % comm_size;
387+
res = NBC_Sched_recv(buf, false, count, datatype, parent, schedule, true);
388+
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
389+
break;
390+
}
391+
mask *= knomial_radix;
392+
}
393+
mask /= knomial_radix;
394+
395+
/* Send data to all children */
396+
while (mask > 0) {
397+
for (int r = 1; r < knomial_radix; r++) {
398+
int child = vrank + mask * r;
399+
if (child < comm_size) {
400+
child = (child + root) % comm_size;
401+
res = NBC_Sched_send(buf, false, count, datatype, child, schedule, false);
402+
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
403+
}
404+
}
405+
mask /= knomial_radix;
406+
}
407+
408+
cleanup_and_return:
409+
return res;
410+
}
411+
345412
static int nbc_bcast_inter_init(void *buffer, int count, MPI_Datatype datatype, int root,
346413
struct ompi_communicator_t *comm, ompi_request_t ** request,
347414
struct mca_coll_base_module_2_3_0_t *module, bool persistent) {

0 commit comments

Comments
 (0)