Skip to content

Commit c2970a3

Browse files
bosilcajsquyres
authored andcommitted
Correctly handle non-blocking collectives tags
As it is possible to have multiple outstanding non-blocking collectives provided by different collective modules, we need a consistent mechanism to allow them to select unique tags for each instance of a collective. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent 8582e10 commit c2970a3

File tree

8 files changed

+151
-192
lines changed

8 files changed

+151
-192
lines changed

ompi/communicator/comm_init.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "ompi/constants.h"
4141
#include "ompi/mca/pml/pml.h"
4242
#include "ompi/mca/coll/base/base.h"
43+
#include "ompi/mca/coll/base/coll_tags.h"
4344
#include "ompi/mca/topo/base/base.h"
4445
#include "ompi/runtime/params.h"
4546
#include "ompi/communicator/communicator.h"
@@ -382,9 +383,8 @@ static void ompi_comm_construct(ompi_communicator_t* comm)
382383
comm->c_pml_comm = NULL;
383384
comm->c_topo = NULL;
384385
comm->c_coll = NULL;
385-
comm->c_ibcast_tag = 0;
386-
comm->c_ireduce_tag = 0;
387-
386+
comm->c_nbc_tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
387+
388388
/* A keyhash will be created if/when an attribute is cached on
389389
this communicator */
390390
comm->c_keyhash = NULL;

ompi/communicator/communicator.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,12 @@ struct ompi_communicator_t {
188188
/* Collectives module interface and data */
189189
mca_coll_base_comm_coll_t *c_coll;
190190

191-
/* Non-blocking collective tag. These are added here as they should be
192-
* shared between all non-blocking collective modules (to avoid message
193-
* collisions between them in the case where multiple outstanding
194-
* non-blocking collective coexists using multiple backends).
191+
/* Non-blocking collective tag. These tags might be shared between
192+
* all non-blocking collective modules (to avoid message collision
193+
* between them in the case where multiple outstanding non-blocking
194+
* collective coexists using multiple backends).
195195
*/
196-
opal_atomic_int32_t c_ibcast_tag;
197-
opal_atomic_int32_t c_ireduce_tag;
196+
opal_atomic_int32_t c_nbc_tag;
198197
};
199198
typedef struct ompi_communicator_t ompi_communicator_t;
200199

ompi/mca/coll/adapt/coll_adapt_algorithms.h

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,40 +25,26 @@ int ompi_coll_adapt_ibcast_fini(void);
2525
int ompi_coll_adapt_bcast(BCAST_ARGS);
2626
int ompi_coll_adapt_ibcast(IBCAST_ARGS);
2727
int ompi_coll_adapt_ibcast_generic(IBCAST_ARGS,
28-
ompi_coll_tree_t * tree, size_t seg_size, int ibcast_tag);
29-
int ompi_coll_adapt_ibcast_binomial(IBCAST_ARGS,
30-
int ibcast_tag);
31-
int ompi_coll_adapt_ibcast_in_order_binomial(IBCAST_ARGS,
32-
int ibcast_tag);
33-
int ompi_coll_adapt_ibcast_binary(IBCAST_ARGS,
34-
int ibcast_tag);
35-
int ompi_coll_adapt_ibcast_pipeline(IBCAST_ARGS,
36-
int ibcast_tag);
37-
int ompi_coll_adapt_ibcast_chain(IBCAST_ARGS,
38-
int ibcast_tag);
39-
int ompi_coll_adapt_ibcast_linear(IBCAST_ARGS,
40-
int ibcast_tag);
41-
int ompi_coll_adapt_ibcast_tuned(IBCAST_ARGS,
42-
int ibcast_tag);
28+
ompi_coll_tree_t * tree, size_t seg_size);
29+
int ompi_coll_adapt_ibcast_binomial(IBCAST_ARGS);
30+
int ompi_coll_adapt_ibcast_in_order_binomial(IBCAST_ARGS);
31+
int ompi_coll_adapt_ibcast_binary(IBCAST_ARGS);
32+
int ompi_coll_adapt_ibcast_pipeline(IBCAST_ARGS);
33+
int ompi_coll_adapt_ibcast_chain(IBCAST_ARGS);
34+
int ompi_coll_adapt_ibcast_linear(IBCAST_ARGS);
35+
int ompi_coll_adapt_ibcast_tuned(IBCAST_ARGS);
4336

4437
/* Reduce */
4538
int ompi_coll_adapt_ireduce_register(void);
4639
int ompi_coll_adapt_ireduce_fini(void);
4740
int ompi_coll_adapt_reduce(REDUCE_ARGS);
4841
int ompi_coll_adapt_ireduce(IREDUCE_ARGS);
4942
int ompi_coll_adapt_ireduce_generic(IREDUCE_ARGS,
50-
ompi_coll_tree_t * tree, size_t seg_size, int ireduce_tag);
51-
int ompi_coll_adapt_ireduce_tuned(IREDUCE_ARGS,
52-
int ireduce_tag);
53-
int ompi_coll_adapt_ireduce_binomial(IREDUCE_ARGS,
54-
int ireduce_tag);
55-
int ompi_coll_adapt_ireduce_in_order_binomial(IREDUCE_ARGS,
56-
int ireduce_tag);
57-
int ompi_coll_adapt_ireduce_binary(IREDUCE_ARGS,
58-
int ireduce_tag);
59-
int ompi_coll_adapt_ireduce_pipeline(IREDUCE_ARGS,
60-
int ireduce_tag);
61-
int ompi_coll_adapt_ireduce_chain(IREDUCE_ARGS,
62-
int ireduce_tag);
63-
int ompi_coll_adapt_ireduce_linear(IREDUCE_ARGS,
64-
int ireduce_tag);
43+
ompi_coll_tree_t * tree, size_t seg_size);
44+
int ompi_coll_adapt_ireduce_tuned(IREDUCE_ARGS);
45+
int ompi_coll_adapt_ireduce_binomial(IREDUCE_ARGS);
46+
int ompi_coll_adapt_ireduce_in_order_binomial(IREDUCE_ARGS);
47+
int ompi_coll_adapt_ireduce_binary(IREDUCE_ARGS);
48+
int ompi_coll_adapt_ireduce_pipeline(IREDUCE_ARGS);
49+
int ompi_coll_adapt_ireduce_chain(IREDUCE_ARGS);
50+
int ompi_coll_adapt_ireduce_linear(IREDUCE_ARGS);

ompi/mca/coll/adapt/coll_adapt_ibcast.c

Lines changed: 42 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "coll_adapt.h"
1515
#include "coll_adapt_algorithms.h"
1616
#include "coll_adapt_context.h"
17-
#include "ompi/mca/coll/base/coll_tags.h"
17+
#include "ompi/mca/coll/base/coll_base_util.h"
1818
#include "ompi/mca/coll/base/coll_base_functions.h"
1919
#include "opal/util/bit_ops.h"
2020
#include "opal/sys/atomic.h"
@@ -27,8 +27,7 @@ typedef int (*ompi_coll_adapt_ibcast_fn_t) (void *buff,
2727
int root,
2828
struct ompi_communicator_t * comm,
2929
ompi_request_t ** request,
30-
mca_coll_base_module_t * module,
31-
int ibcast_tag);
30+
mca_coll_base_module_t * module);
3231

3332
static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ibcast_algorithm_index[] = {
3433
{0, (uintptr_t) ompi_coll_adapt_ibcast_tuned},
@@ -158,11 +157,11 @@ static int send_cb(ompi_request_t * req)
158157
"[%d]: Send(start in send cb): segment %d to %d at buff %p send_count %d tag %d\n",
159158
ompi_comm_rank(send_context->con->comm), send_context->frag_id,
160159
send_context->peer, (void *) send_context->buff, send_count,
161-
(send_context->con->ibcast_tag << 16) + new_id));
160+
send_context->con->ibcast_tag - new_id));
162161
err =
163162
MCA_PML_CALL(isend
164163
(send_buff, send_count, send_context->con->datatype, send_context->peer,
165-
(send_context->con->ibcast_tag << 16) + new_id,
164+
send_context->con->ibcast_tag - new_id,
166165
MCA_PML_BASE_SEND_SYNCHRONOUS, send_context->con->comm, &send_req));
167166
if (MPI_SUCCESS != err) {
168167
OPAL_THREAD_UNLOCK(context->con->mutex);
@@ -245,10 +244,10 @@ static int recv_cb(ompi_request_t * req)
245244
"[%d]: Recv(start in recv cb): segment %d from %d at buff %p recv_count %d tag %d\n",
246245
ompi_comm_rank(context->con->comm), context->frag_id, context->peer,
247246
(void *) recv_buff, recv_count,
248-
(recv_context->con->ibcast_tag << 16) + recv_context->frag_id));
247+
recv_context->con->ibcast_tag - recv_context->frag_id));
249248
MCA_PML_CALL(irecv
250249
(recv_buff, recv_count, recv_context->con->datatype, recv_context->peer,
251-
(recv_context->con->ibcast_tag << 16) + recv_context->frag_id,
250+
recv_context->con->ibcast_tag - recv_context->frag_id,
252251
recv_context->con->comm, &recv_req));
253252

254253
/* Invoke recvive call back */
@@ -282,12 +281,12 @@ static int recv_cb(ompi_request_t * req)
282281
"[%d]: Send(start in recv cb): segment %d to %d at buff %p send_count %d tag %d\n",
283282
ompi_comm_rank(send_context->con->comm), send_context->frag_id,
284283
send_context->peer, (void *) send_context->buff, send_count,
285-
(send_context->con->ibcast_tag << 16) + send_context->frag_id));
284+
send_context->con->ibcast_tag - send_context->frag_id));
286285
err =
287286
MCA_PML_CALL(isend
288287
(send_buff, send_count, send_context->con->datatype,
289288
send_context->peer,
290-
(send_context->con->ibcast_tag << 16) + send_context->frag_id,
289+
send_context->con->ibcast_tag - send_context->frag_id,
291290
MCA_PML_BASE_SEND_SYNCHRONOUS, send_context->con->comm, &send_req));
292291
if (MPI_SUCCESS != err) {
293292
OPAL_THREAD_UNLOCK(context->con->mutex);
@@ -344,12 +343,10 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
344343
*request = temp_request;
345344
return MPI_SUCCESS;
346345
}
347-
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
348-
ibcast_tag = ibcast_tag % 4096;
349346

350347
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
351-
"ibcast tag %d root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
352-
ibcast_tag, root, mca_coll_adapt_component.adapt_ibcast_algorithm,
348+
"ibcast root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
349+
root, mca_coll_adapt_component.adapt_ibcast_algorithm,
353350
mca_coll_adapt_component.adapt_ibcast_segment_size,
354351
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
355352
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
@@ -358,89 +355,82 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
358355
(ompi_coll_adapt_ibcast_fn_t)
359356
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
360357
algorithm_fn_ptr;
361-
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
358+
return bcast_func(buff, count, datatype, root, comm, request, module);
362359
}
363360

364361
/*
365362
* Ibcast functions with different algorithms
366363
*/
367364
int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype,
368-
int root, struct ompi_communicator_t *comm,
369-
ompi_request_t ** request,
370-
mca_coll_base_module_t *module, int ibcast_tag)
365+
int root, struct ompi_communicator_t *comm,
366+
ompi_request_t ** request,
367+
mca_coll_base_module_t *module)
371368
{
372369
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
373370
return OMPI_ERR_NOT_IMPLEMENTED;
374371
}
375372

376373
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
377-
int root, struct ompi_communicator_t *comm,
378-
ompi_request_t ** request, mca_coll_base_module_t * module,
379-
int ibcast_tag)
374+
int root, struct ompi_communicator_t *comm,
375+
ompi_request_t ** request, mca_coll_base_module_t * module)
380376
{
381377
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_bmtree(comm, root);
382378
int err =
383379
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
384-
mca_coll_adapt_component.adapt_ibcast_segment_size,
385-
ibcast_tag);
380+
mca_coll_adapt_component.adapt_ibcast_segment_size);
386381
return err;
387382
}
388383

389384
int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
390-
int root, struct ompi_communicator_t *comm,
391-
ompi_request_t ** request,
392-
mca_coll_base_module_t * module, int ibcast_tag)
385+
int root, struct ompi_communicator_t *comm,
386+
ompi_request_t ** request,
387+
mca_coll_base_module_t * module)
393388
{
394389
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_in_order_bmtree(comm, root);
395390
int err =
396391
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
397-
mca_coll_adapt_component.adapt_ibcast_segment_size,
398-
ibcast_tag);
392+
mca_coll_adapt_component.adapt_ibcast_segment_size);
399393
return err;
400394
}
401395

402396

403397
int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
404-
struct ompi_communicator_t *comm, ompi_request_t ** request,
405-
mca_coll_base_module_t * module, int ibcast_tag)
398+
struct ompi_communicator_t *comm, ompi_request_t ** request,
399+
mca_coll_base_module_t * module)
406400
{
407401
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_tree(2, comm, root);
408402
int err =
409403
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
410-
mca_coll_adapt_component.adapt_ibcast_segment_size,
411-
ibcast_tag);
404+
mca_coll_adapt_component.adapt_ibcast_segment_size);
412405
return err;
413406
}
414407

415408
int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
416-
int root, struct ompi_communicator_t *comm,
417-
ompi_request_t ** request, mca_coll_base_module_t * module,
418-
int ibcast_tag)
409+
int root, struct ompi_communicator_t *comm,
410+
ompi_request_t ** request, mca_coll_base_module_t * module)
419411
{
420412
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(1, comm, root);
421413
int err =
422414
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
423-
mca_coll_adapt_component.adapt_ibcast_segment_size,
424-
ibcast_tag);
415+
mca_coll_adapt_component.adapt_ibcast_segment_size);
425416
return err;
426417
}
427418

428419

429420
int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
430-
struct ompi_communicator_t *comm, ompi_request_t ** request,
431-
mca_coll_base_module_t * module, int ibcast_tag)
421+
struct ompi_communicator_t *comm, ompi_request_t ** request,
422+
mca_coll_base_module_t * module)
432423
{
433424
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(4, comm, root);
434425
int err =
435426
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
436-
mca_coll_adapt_component.adapt_ibcast_segment_size,
437-
ibcast_tag);
427+
mca_coll_adapt_component.adapt_ibcast_segment_size);
438428
return err;
439429
}
440430

441431
int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
442-
struct ompi_communicator_t *comm, ompi_request_t ** request,
443-
mca_coll_base_module_t * module, int ibcast_tag)
432+
struct ompi_communicator_t *comm, ompi_request_t ** request,
433+
mca_coll_base_module_t * module)
444434
{
445435
int fanout = ompi_comm_size(comm) - 1;
446436
ompi_coll_tree_t *tree;
@@ -453,16 +443,15 @@ int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t
453443
}
454444
int err =
455445
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
456-
mca_coll_adapt_component.adapt_ibcast_segment_size,
457-
ibcast_tag);
446+
mca_coll_adapt_component.adapt_ibcast_segment_size);
458447
return err;
459448
}
460449

461450

462451
int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
463-
struct ompi_communicator_t *comm, ompi_request_t ** request,
464-
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
465-
size_t seg_size, int ibcast_tag)
452+
struct ompi_communicator_t *comm, ompi_request_t ** request,
453+
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
454+
size_t seg_size)
466455
{
467456
int i, j, rank, err;
468457
/* The min of num_segs and SEND_NUM or RECV_NUM, in case the num_segs is less than SEND_NUM or RECV_NUM */
@@ -555,11 +544,11 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
555544
con->mutex = mutex;
556545
con->request = temp_request;
557546
con->tree = tree;
558-
con->ibcast_tag = ibcast_tag;
547+
con->ibcast_tag = ompi_coll_base_nbc_reserve_tags(comm, num_segs);
559548

560549
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
561550
"[%d]: Ibcast, root %d, tag %d\n", rank, root,
562-
ibcast_tag));
551+
con->ibcast_tag));
563552
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
564553
"[%d]: con->mutex = %p, num_children = %d, num_segs = %d, real_seg_size = %d, seg_count = %d, tree_adreess = %p\n",
565554
rank, (void *) con->mutex, tree->tree_nextsize, num_segs,
@@ -610,11 +599,11 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
610599
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
611600
"[%d]: Send(start in main): segment %d to %d at buff %p send_count %d tag %d\n",
612601
rank, context->frag_id, context->peer,
613-
(void *) send_buff, send_count, (ibcast_tag << 16) + i));
602+
(void *) send_buff, send_count, con->ibcast_tag - i));
614603
err =
615604
MCA_PML_CALL(isend
616605
(send_buff, send_count, datatype, context->peer,
617-
(ibcast_tag << 16) + i, MCA_PML_BASE_SEND_SYNCHRONOUS, comm,
606+
con->ibcast_tag - i, MCA_PML_BASE_SEND_SYNCHRONOUS, comm,
618607
&send_req));
619608
if (MPI_SUCCESS != err) {
620609
return err;
@@ -668,11 +657,11 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
668657
"[%d]: Recv(start in main): segment %d from %d at buff %p recv_count %d tag %d\n",
669658
ompi_comm_rank(context->con->comm), context->frag_id,
670659
context->peer, (void *) recv_buff, recv_count,
671-
(ibcast_tag << 16) + i));
660+
con->ibcast_tag - i));
672661
err =
673662
MCA_PML_CALL(irecv
674663
(recv_buff, recv_count, datatype, context->peer,
675-
(ibcast_tag << 16) + i, comm, &recv_req));
664+
con->ibcast_tag - i, comm, &recv_req));
676665
if (MPI_SUCCESS != err) {
677666
return err;
678667
}

0 commit comments

Comments
 (0)