Skip to content

Commit ff2fd06

Browse files
author
Sergey Oblomov
committed
OSHMEM/COLL: optimization on zero-length ops
- removed barrier call on zero-length operations Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
1 parent 9de128a commit ff2fd06

File tree

5 files changed

+56
-21
lines changed

5 files changed

+56
-21
lines changed

oshmem/mca/scoll/basic/scoll_basic_alltoall.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,20 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
6161
return OSHMEM_ERR_BAD_PARAM;
6262
}
6363

64-
if (nelems) {
65-
if ((sst == 1) && (dst == 1)) {
66-
rc = a2a_alg_simple(group, target, source, nelems, element_size);
67-
} else {
68-
rc = a2as_alg_simple(group, target, source, dst, sst, nelems,
69-
element_size);
70-
}
64+
/* Do nothing on zero-length request */
65+
if (OPAL_UNLIKELY(!nelems)) {
66+
return OPAL_SUCCESS;
67+
}
7168

72-
if (rc != OSHMEM_SUCCESS) {
73-
return rc;
74-
}
69+
if ((sst == 1) && (dst == 1)) {
70+
rc = a2a_alg_simple(group, target, source, nelems, element_size);
71+
} else {
72+
rc = a2as_alg_simple(group, target, source, dst, sst, nelems,
73+
element_size);
74+
}
75+
76+
if (rc != OSHMEM_SUCCESS) {
77+
return rc;
7578
}
7679

7780
/* quiet is needed because scoll level barrier does not

oshmem/mca/scoll/basic/scoll_basic_broadcast.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ int mca_scoll_basic_broadcast(struct oshmem_group_t *group,
5555
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
5656
int i = 0;
5757

58+
/* Do nothing on zero-length request */
59+
if (OPAL_UNLIKELY(!nlong)) {
60+
return OSHMEM_SUCCESS;
61+
}
62+
5863
if (pSync) {
5964
alg = (alg == SCOLL_DEFAULT_ALG ?
6065
mca_scoll_basic_param_broadcast_algorithm : alg);
@@ -131,7 +136,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
131136
group->my_pe, pSync[0], PE_root);
132137

133138
/* Check if this PE is the root */
134-
if ((PE_root == group->my_pe) && nlong) {
139+
if (PE_root == group->my_pe) {
135140
int pe_cur = 0;
136141

137142
SCOLL_VERBOSE(14,

oshmem/mca/scoll/basic/scoll_basic_collect.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,12 @@ int mca_scoll_basic_collect(struct oshmem_group_t *group,
6666
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
6767
int i = 0;
6868

69-
if (nlong_type && nlong) {
69+
/* Do nothing on zero-length request */
70+
if (OPAL_UNLIKELY(!nlong)) {
71+
return OPAL_SUCCESS;
72+
}
73+
74+
if (nlong_type) {
7075
alg = (alg == SCOLL_DEFAULT_ALG ?
7176
mca_scoll_basic_param_collect_algorithm : alg);
7277
switch (alg) {
@@ -156,7 +161,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group,
156161
group->my_pe);
157162
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
158163

159-
if ((PE_root == group->my_pe) && nlong) {
164+
if (PE_root == group->my_pe) {
160165
int pe_cur = 0;
161166

162167
memcpy((void*) ((unsigned char*) target + 0 * nlong),
@@ -543,7 +548,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group,
543548
/* Set own data size */
544549
pSync[0] = (nlong ? (long)nlong : SHMEM_SYNC_READY);
545550

546-
if ((PE_root == group->my_pe) && nlong) {
551+
if (PE_root == group->my_pe) {
547552
long value = 0;
548553
int pe_cur = 0;
549554
long wait_pe_count = 0;

oshmem/mca/scoll/basic/scoll_basic_reduce.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,14 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group,
7878
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
7979
int i = 0;
8080

81+
/* Do nothing on zero-length request */
82+
if (OPAL_UNLIKELY(!nlong)) {
83+
return OSHMEM_SUCCESS;
84+
}
85+
8186
if (pSync) {
82-
alg = (nlong ? (alg == SCOLL_DEFAULT_ALG ?
83-
mca_scoll_basic_param_reduce_algorithm : alg) :
84-
SCOLL_ALG_REDUCE_CENTRAL_COUNTER );
87+
alg = (alg == SCOLL_DEFAULT_ALG ?
88+
mca_scoll_basic_param_reduce_algorithm : alg);
8589
switch (alg) {
8690
case SCOLL_ALG_REDUCE_CENTRAL_COUNTER:
8791
{
@@ -186,7 +190,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
186190

187191
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe);
188192

189-
if ((PE_root == group->my_pe) && nlong) {
193+
if (PE_root == group->my_pe) {
190194
int pe_cur = 0;
191195
void *target_cur = NULL;
192196

oshmem/mca/scoll/mpi/scoll_mpi_ops.c

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,20 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group,
5454
}
5555
dtype = &ompi_mpi_char.dt;
5656
root = oshmem_proc_group_find_id(group, PE_root);
57+
58+
/* Do nothing on zero-length request */
59+
if (OPAL_UNLIKELY(!nlong)) {
60+
return OSHMEM_SUCCESS;
61+
}
62+
5763
/* Open SHMEM specification has the following constrains (page 85):
5864
* "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a
5965
* default integer value". And also fortran signature says "INTEGER".
6066
* Since ompi coll components doesn't support size_t at the moment,
6167
* and considering this contradiction, we cast size_t to int here
6268
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */
6369
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
64-
if ((INT_MAX < nlong) || !nlong) {
70+
if (INT_MAX < nlong) {
6571
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST");
6672
PREVIOUS_SCOLL_FN(mpi_module, broadcast, group,
6773
PE_root,
@@ -104,7 +110,13 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group,
104110
void *sbuf, *rbuf;
105111
MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER");
106112
mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module;
107-
if ((nlong_type == true) && nlong) {
113+
114+
/* Do nothing on zero-length request */
115+
if (OPAL_UNLIKELY(!nlong)) {
116+
return OSHMEM_SUCCESS;
117+
}
118+
119+
if (nlong_type == true) {
108120
sbuf = (void *) source;
109121
rbuf = target;
110122
stype = &ompi_mpi_char.dt;
@@ -177,14 +189,20 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group,
177189
dtype = shmem_dtype_to_ompi_dtype(op);
178190
h_op = shmem_op_to_ompi_op(op->op);
179191
count = nlong/op->dt_size;
192+
193+
/* Do nothing on zero-length request */
194+
if (OPAL_UNLIKELY(!nlong)) {
195+
return OSHMEM_SUCCESS;
196+
}
197+
180198
/* Open SHMEM specification has the following constrains (page 85):
181199
* "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a
182200
* default integer value". And also fortran signature says "INTEGER".
183201
* Since ompi coll components doesn't support size_t at the moment,
184202
* and considering this contradiction, we cast size_t to int here
185203
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */
186204
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
187-
if ((INT_MAX < count) || !nlong) {
205+
if (INT_MAX < count) {
188206
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK REDUCE");
189207
PREVIOUS_SCOLL_FN(mpi_module, reduce, group,
190208
op,

0 commit comments

Comments
 (0)