Skip to content

Commit 07302f6

Browse files
amd-nithyavsMithunMohanKadavil
authored andcommitted
coll/acoll: A few fixes and fallbacks
Code changes to remove a few coverity scan warnings, fallbacks to base algorithms for custom datatypes. Signed-off-by: Nithya V S <Nithya.VS@amd.com>
1 parent f8deca0 commit 07302f6

File tree

7 files changed

+65
-29
lines changed

7 files changed

+65
-29
lines changed

ompi/mca/coll/acoll/README

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ $HEADER$
88

99
===========================================================================
1010

11-
The collective component, AMD Coll (“acoll”), is a high-performant MPI collective component for the OpenMPI library that is optimized for AMD "Zen"-based processors. “acoll” is optimized for communications within a single node of AMD “Zen”-based processors and provides the following commonly used collective algorithms: boardcast (MPI_Bcast), allreduce (MPI_Allreduce), reduce (MPI_Reduce), gather (MPI_Gather), allgather (MPI_Allgather), and barrier (MPI_Barrier).
11+
The collective component, AMD Coll (“acoll”), is a high-performant MPI collective component for the OpenMPI library that is optimized for AMD "Zen"-based processors. “acoll” is optimized for communications within a single node of AMD “Zen”-based processors and provides the following commonly used collective algorithms: boardcast (MPI_Bcast), allreduce (MPI_Allreduce), reduce (MPI_Reduce), gather (MPI_Gather), allgather (MPI_Allgather), alltoall (MPI_Alltoall), and barrier (MPI_Barrier).
1212

13-
At present, “acoll” has been tested with OpenMPI v5.0.2 and can be built as part of OpenMPI.
13+
At present, “acoll” has been tested with OpenMPI main branch and can be built as part of OpenMPI.
1414

1515
To run an application with acoll, use the following command line parameters
1616
- mpirun <common mpi runtime parameters> --mca coll acoll,tuned,libnbc,basic --mca coll_acoll_priority 40 <executable>

ompi/mca/coll/acoll/coll_acoll_allgather.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static inline int log_sg_bcast_intra(void *buff, size_t count, struct ompi_datat
2929
mca_coll_base_module_t *module, ompi_request_t **preq,
3030
int *nreqs)
3131
{
32-
int msb_pos, sub_rank, peer, err;
32+
int msb_pos, sub_rank, peer, err = MPI_SUCCESS;
3333
int i, mask;
3434
int end_sg, end_peer;
3535

@@ -92,7 +92,7 @@ static inline int lin_sg_bcast_intra(void *buff, size_t count, struct ompi_datat
9292
int *nreqs)
9393
{
9494
int peer;
95-
int err;
95+
int err = MPI_SUCCESS;
9696
int sg_end;
9797

9898
sg_end = sg_start + sg_size - 1;

ompi/mca/coll/acoll/coll_acoll_allreduce.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,21 @@ int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, size_t count,
450450
ompi_datatype_type_size(dtype, &dsize);
451451
total_dsize = dsize * count;
452452

453-
if (1 == size) {
453+
/* Disable shm/xpmem based optimizations if: */
454+
/* - datatype is not a predefined type */
455+
/* - it's a gpu buffer */
456+
uint64_t flags = 0;
457+
int dev_id;
458+
bool is_opt = true;
459+
if (!OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm)) {
460+
if (!ompi_datatype_is_predefined(dtype)
461+
|| (0 < opal_accelerator.check_addr(sbuf, &dev_id, &flags))
462+
|| (0 < opal_accelerator.check_addr(rbuf, &dev_id, &flags))) {
463+
is_opt = false;
464+
}
465+
}
466+
467+
if ((1 == size) && is_opt) {
454468
if (MPI_IN_PLACE != sbuf) {
455469
memcpy((char *) rbuf, sbuf, total_dsize);
456470
}
@@ -486,7 +500,7 @@ int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, size_t count,
486500
if (total_dsize < 32) {
487501
return ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf, count, dtype, op,
488502
comm, module);
489-
} else if (total_dsize < 512) {
503+
} else if ((total_dsize < 512) && is_opt) {
490504
return mca_coll_acoll_allreduce_small_msgs_h(sbuf, rbuf, count, dtype, op, comm, module,
491505
subc, 1);
492506
} else if (total_dsize <= 2048) {
@@ -505,7 +519,7 @@ int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, size_t count,
505519
}
506520
} else if (total_dsize < 4194304) {
507521
#ifdef HAVE_XPMEM_H
508-
if (((subc->xpmem_use_sr_buf != 0) || (subc->xpmem_buf_size > 2 * total_dsize)) && (subc->without_xpmem != 1)) {
522+
if (((subc->xpmem_use_sr_buf != 0) || (subc->xpmem_buf_size > 2 * total_dsize)) && (subc->without_xpmem != 1) && is_opt) {
509523
return mca_coll_acoll_allreduce_xpmem_f(sbuf, rbuf, count, dtype, op, comm, module, subc);
510524
} else {
511525
return ompi_coll_base_allreduce_intra_redscat_allgather(sbuf, rbuf, count, dtype,
@@ -517,7 +531,7 @@ int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, size_t count,
517531
#endif
518532
} else if (total_dsize <= 16777216) {
519533
#ifdef HAVE_XPMEM_H
520-
if (((subc->xpmem_use_sr_buf != 0) || (subc->xpmem_buf_size > 2 * total_dsize)) && (subc->without_xpmem != 1)) {
534+
if (((subc->xpmem_use_sr_buf != 0) || (subc->xpmem_buf_size > 2 * total_dsize)) && (subc->without_xpmem != 1) && is_opt) {
521535
mca_coll_acoll_reduce_xpmem_h(sbuf, rbuf, count, dtype, op, comm, module, subc);
522536
return mca_coll_acoll_bcast(rbuf, count, dtype, 0, comm, module);
523537
} else {
@@ -530,7 +544,7 @@ int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, size_t count,
530544
#endif
531545
} else {
532546
#ifdef HAVE_XPMEM_H
533-
if (((subc->xpmem_use_sr_buf != 0) || (subc->xpmem_buf_size > 2 * total_dsize)) && (subc->without_xpmem != 1)) {
547+
if (((subc->xpmem_use_sr_buf != 0) || (subc->xpmem_buf_size > 2 * total_dsize)) && (subc->without_xpmem != 1) && is_opt) {
534548
return mca_coll_acoll_allreduce_xpmem_f(sbuf, rbuf, count, dtype, op, comm, module, subc);
535549
} else {
536550
return ompi_coll_base_allreduce_intra_redscat_allgather(sbuf, rbuf, count, dtype,

ompi/mca/coll/acoll/coll_acoll_alltoall.c

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -529,14 +529,8 @@ int mca_coll_acoll_alltoall
529529
struct ompi_communicator_t *split_comm;
530530

531531
/* Select the right split_comm. */
532-
int pow2_idx = -2;
533-
int tmp_grp_split_f = grp_split_f;
534-
while (tmp_grp_split_f > 0)
535-
{
536-
pow2_idx += 1;
537-
tmp_grp_split_f = tmp_grp_split_f / 2;
538-
}
539-
split_comm = subc->split_comm[pow2_idx];
532+
int comm_idx = grp_split_f > 2 ? opal_cube_dim(grp_split_f/2) : 0;
533+
split_comm = subc->split_comm[comm_idx];
540534

541535
error = mca_coll_acoll_base_alltoall_dispatcher
542536
(sbuf, (grp_split_f * scount), sdtype,

ompi/mca/coll/acoll/coll_acoll_bcast.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,18 @@ int mca_coll_acoll_bcast(void *buff, size_t count, struct ompi_datatype_t *datat
686686
if (size <= 2)
687687
no_sg = 1;
688688

689+
/* Disable shm based bcast if: */
690+
/* - datatype is not a predefined type */
691+
/* - it's a gpu buffer */
692+
uint64_t flags = 0;
693+
int dev_id;
694+
if (!OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm)) {
695+
if (!ompi_datatype_is_predefined(datatype)
696+
|| (0 < opal_accelerator.check_addr(buff, &dev_id, &flags))) {
697+
use_shm = 0;
698+
}
699+
}
700+
689701
coll_acoll_bcast_subcomms(comm, subc, subcomms, subc_roots, root, num_nodes, use_0, no_sg,
690702
use_numa, use_socket);
691703

ompi/mca/coll/acoll/coll_acoll_reduce.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static inline int coll_acoll_reduce_topo(const void *sbuf, void *rbuf, size_t co
119119
MCA_COLL_BASE_TAG_REDUCE, MCA_PML_BASE_SEND_STANDARD,
120120
subc->base_comm[ind1][ind2]));
121121
if (ret != MPI_SUCCESS) {
122-
free(pml_buffer);
122+
free(free_buffer);
123123
if (NULL != tmp_rbuf) {
124124
coll_acoll_buf_free(reserve_mem_rbuf_reduce, tmp_rbuf);
125125
}
@@ -134,7 +134,7 @@ static inline int coll_acoll_reduce_topo(const void *sbuf, void *rbuf, size_t co
134134
ret = MCA_PML_CALL(recv(pml_buffer, count, dtype, i, MCA_COLL_BASE_TAG_REDUCE,
135135
subc->base_comm[ind1][ind2], MPI_STATUS_IGNORE));
136136
if (ret != MPI_SUCCESS) {
137-
free(pml_buffer);
137+
free(free_buffer);
138138
return ret;
139139
}
140140
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
@@ -143,8 +143,8 @@ static inline int coll_acoll_reduce_topo(const void *sbuf, void *rbuf, size_t co
143143
}
144144

145145
/* if local root, reduce at root */
146-
if (is_base && (sz > 1)) {
147-
free(pml_buffer);
146+
if (is_base) {
147+
free(free_buffer);
148148
if (rank != root && NULL != tmp_rbuf) {
149149
coll_acoll_buf_free(reserve_mem_rbuf_reduce, tmp_rbuf);
150150
}
@@ -329,6 +329,20 @@ int mca_coll_acoll_reduce_intra(const void *sbuf, void *rbuf, size_t count,
329329
module, 0, 0);
330330
}
331331

332+
/* Disable shm/xpmem based optimizations if: */
333+
/* - datatype is not a predefined type */
334+
/* - it's a gpu buffer */
335+
uint64_t flags = 0;
336+
int dev_id;
337+
bool is_opt = true;
338+
if (!OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm)) {
339+
if (!ompi_datatype_is_predefined(dtype)
340+
|| (0 < opal_accelerator.check_addr(sbuf, &dev_id, &flags))
341+
|| (0 < opal_accelerator.check_addr(rbuf, &dev_id, &flags))) {
342+
is_opt = false;
343+
}
344+
}
345+
332346
ompi_datatype_type_size(dtype, &dsize);
333347
total_dsize = dsize * count;
334348

@@ -374,7 +388,7 @@ int mca_coll_acoll_reduce_intra(const void *sbuf, void *rbuf, size_t count,
374388
&& (acoll_module->reserve_mem_s).reserve_mem_allocate
375389
&& ((acoll_module->reserve_mem_s).reserve_mem_size >= total_dsize))
376390
|| ((0 == subc->xpmem_use_sr_buf) && (subc->xpmem_buf_size > 2 * total_dsize)))
377-
&& (subc->without_xpmem != 1)) {
391+
&& (subc->without_xpmem != 1) && is_opt) {
378392
return mca_coll_acoll_reduce_xpmem(sbuf, rbuf, count, dtype, op, root, comm,
379393
module, subc);
380394
} else {

ompi/mca/coll/acoll/coll_acoll_utils.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,15 @@ static inline int check_and_create_subc(ompi_communicator_t *comm,
101101
if (NULL == acoll_module->subc) {
102102
return OMPI_ERR_OUT_OF_RESOURCE;
103103
}
104-
}
105-
106-
/* Check if subcomms structure is already created for the communicator */
107-
for (int i = 0; i < num_subc; i++) {
108-
if (acoll_module->subc[i]->cid == cid) {
109-
*subc_ptr = acoll_module->subc[i];
110-
return MPI_SUCCESS;
104+
} else {
105+
/* Check if subcomms structure is already created for the communicator */
106+
for (int i = 0; i < num_subc; i++) {
107+
if (NULL != acoll_module->subc[i]) {
108+
if (acoll_module->subc[i]->cid == cid) {
109+
*subc_ptr = acoll_module->subc[i];
110+
return MPI_SUCCESS;
111+
}
112+
}
111113
}
112114
}
113115

0 commit comments

Comments
 (0)