@@ -26,6 +26,8 @@ static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *sc
26
26
MPI_Datatype datatype );
27
27
static inline int bcast_sched_chain (int rank , int p , int root , NBC_Schedule * schedule , void * buffer , int count ,
28
28
MPI_Datatype datatype , int fragsize , size_t size );
29
+ static inline int bcast_sched_knomial (int rank , int comm_size , int root , NBC_Schedule * schedule , void * buf ,
30
+ int count , MPI_Datatype datatype , int knomial_radix );
29
31
30
32
#ifdef NBC_CACHE_SCHEDULE
31
33
/* tree comparison function for schedule cache */
@@ -55,7 +57,7 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
55
57
#ifdef NBC_CACHE_SCHEDULE
56
58
NBC_Bcast_args * args , * found , search ;
57
59
#endif
58
- enum { NBC_BCAST_LINEAR , NBC_BCAST_BINOMIAL , NBC_BCAST_CHAIN } alg ;
60
+ enum { NBC_BCAST_LINEAR , NBC_BCAST_BINOMIAL , NBC_BCAST_CHAIN , NBC_BCAST_KNOMIAL } alg ;
59
61
ompi_coll_libnbc_module_t * libnbc_module = (ompi_coll_libnbc_module_t * ) module ;
60
62
61
63
rank = ompi_comm_rank (comm );
@@ -73,25 +75,40 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
73
75
74
76
segsize = 16384 ;
75
77
/* algorithm selection */
76
- if ( libnbc_ibcast_skip_dt_decision ) {
77
- if (p <= 4 ) {
78
- alg = NBC_BCAST_LINEAR ;
78
+ if (libnbc_ibcast_algorithm == 0 ) {
79
+ if ( libnbc_ibcast_skip_dt_decision ) {
80
+ if (p <= 4 ) {
81
+ alg = NBC_BCAST_LINEAR ;
82
+ }
83
+ else {
84
+ alg = NBC_BCAST_BINOMIAL ;
85
+ }
79
86
}
80
87
else {
81
- alg = NBC_BCAST_BINOMIAL ;
88
+ if (p <= 4 ) {
89
+ alg = NBC_BCAST_LINEAR ;
90
+ } else if (size * count < 65536 ) {
91
+ alg = NBC_BCAST_BINOMIAL ;
92
+ } else if (size * count < 524288 ) {
93
+ alg = NBC_BCAST_CHAIN ;
94
+ segsize = 8192 ;
95
+ } else {
96
+ alg = NBC_BCAST_CHAIN ;
97
+ segsize = 32768 ;
98
+ }
82
99
}
83
- }
84
- else {
85
- if (p <= 4 ) {
100
+ } else {
101
+ /* user forced dynamic decision */
102
+ if (libnbc_ibcast_algorithm == 1 ) {
86
103
alg = NBC_BCAST_LINEAR ;
87
- } else if (size * count < 65536 ) {
104
+ } else if (libnbc_ibcast_algorithm == 2 ) {
88
105
alg = NBC_BCAST_BINOMIAL ;
89
- } else if (size * count < 524288 ) {
106
+ } else if (libnbc_ibcast_algorithm == 3 ) {
90
107
alg = NBC_BCAST_CHAIN ;
91
- segsize = 8192 ;
108
+ } else if (libnbc_ibcast_algorithm == 4 && libnbc_ibcast_knomial_radix > 1 ) {
109
+ alg = NBC_BCAST_KNOMIAL ;
92
110
} else {
93
- alg = NBC_BCAST_CHAIN ;
94
- segsize = 32768 ;
111
+ alg = NBC_BCAST_LINEAR ;
95
112
}
96
113
}
97
114
@@ -119,6 +136,9 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
119
136
case NBC_BCAST_CHAIN :
120
137
res = bcast_sched_chain (rank , p , root , schedule , buffer , count , datatype , segsize , size );
121
138
break ;
139
+ case NBC_BCAST_KNOMIAL :
140
+ res = bcast_sched_knomial (rank , p , root , schedule , buffer , count , datatype , libnbc_ibcast_knomial_radix );
141
+ break ;
122
142
}
123
143
124
144
if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
@@ -342,6 +362,53 @@ static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *sch
342
362
return OMPI_SUCCESS ;
343
363
}
344
364
365
+ /*
366
+ * bcast_sched_knomial:
367
+ *
368
+ * Description: an implementation of Ibcast using k-nomial tree algorithm
369
+ *
370
+ * Time: (radix - 1)O(log_{radix}(comm_size))
371
+ * Memory: O(reqs_max)
372
+ * Schedule length (rounds): O(log(comm_size))
373
+ */
374
+ static inline int bcast_sched_knomial (
375
+ int rank , int comm_size , int root , NBC_Schedule * schedule , void * buf ,
376
+ int count , MPI_Datatype datatype , int knomial_radix )
377
+ {
378
+ int res = OMPI_SUCCESS ;
379
+
380
+ /* Receive from parent */
381
+ int vrank = (rank - root + comm_size ) % comm_size ;
382
+ int mask = 0x1 ;
383
+ while (mask < comm_size ) {
384
+ if (vrank % (knomial_radix * mask )) {
385
+ int parent = vrank / (knomial_radix * mask ) * (knomial_radix * mask );
386
+ parent = (parent + root ) % comm_size ;
387
+ res = NBC_Sched_recv (buf , false, count , datatype , parent , schedule , true);
388
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) { goto cleanup_and_return ; }
389
+ break ;
390
+ }
391
+ mask *= knomial_radix ;
392
+ }
393
+ mask /= knomial_radix ;
394
+
395
+ /* Send data to all children */
396
+ while (mask > 0 ) {
397
+ for (int r = 1 ; r < knomial_radix ; r ++ ) {
398
+ int child = vrank + mask * r ;
399
+ if (child < comm_size ) {
400
+ child = (child + root ) % comm_size ;
401
+ res = NBC_Sched_send (buf , false, count , datatype , child , schedule , false);
402
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) { goto cleanup_and_return ; }
403
+ }
404
+ }
405
+ mask /= knomial_radix ;
406
+ }
407
+
408
+ cleanup_and_return :
409
+ return res ;
410
+ }
411
+
345
412
static int nbc_bcast_inter_init (void * buffer , int count , MPI_Datatype datatype , int root ,
346
413
struct ompi_communicator_t * comm , ompi_request_t * * request ,
347
414
struct mca_coll_base_module_2_3_0_t * module , bool persistent ) {
0 commit comments