Skip to content

Commit fb23119

Browse files
Mamzi Bayatpour  mbayatpour@nvidia.com ()janjust
andcommitted
OSC/UCX: Fix coverity issues and missing extend var in nb acc
Signed-off-by: Mamzi Bayatpour <mbayatpour@nvidia.com> Co-authored-by: Tomislav Janjusic <tomislavj@nvidia.com>
1 parent d0bbb75 commit fb23119

File tree

5 files changed

+20
-13
lines changed

5 files changed

+20
-13
lines changed

ompi/mca/osc/ucx/osc_ucx.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ OMPI_DECLSPEC extern ompi_osc_ucx_component_t mca_osc_ucx_component;
4949

5050
#define OSC_UCX_INCREMENT_OUTSTANDING_NB_OPS(_module) \
5151
do { \
52-
opal_atomic_add_fetch_size_t(&_module->ctx->num_incomplete_req_ops, 1); \
52+
opal_atomic_add_fetch_64(&_module->ctx->num_incomplete_req_ops, 1); \
5353
} while(0);
5454

5555
#define OSC_UCX_DECREMENT_OUTSTANDING_NB_OPS(_module) \
5656
do { \
57-
opal_atomic_add_fetch_size_t(&_module->ctx->num_incomplete_req_ops, -1); \
57+
opal_atomic_add_fetch_64(&_module->ctx->num_incomplete_req_ops, -1); \
5858
} while(0);
5959

6060
typedef enum ompi_osc_ucx_epoch {

ompi/mca/osc/ucx/osc_ucx_comm.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ static inline int get_dynamic_win_info(uint64_t remote_addr,
263263
OSC_UCX_GET_DEFAULT_EP(ep, module, target);
264264
uint64_t remote_state_addr = (module->state_addrs)[target] + OSC_UCX_STATE_DYNAMIC_WIN_CNT_OFFSET;
265265
size_t remote_state_len = sizeof(uint64_t) + sizeof(ompi_osc_dynamic_win_info_t) * OMPI_OSC_UCX_ATTACH_MAX;
266-
char *temp_buf;
266+
char *temp_buf = NULL;
267267
ompi_osc_dynamic_win_info_t *temp_dynamic_wins;
268268
int contain = 0;
269269
uint64_t win_count;
@@ -870,11 +870,10 @@ static inline int ompi_osc_ucx_acc_rputget(void *stage_addr, int stage_count,
870870
}
871871
}
872872
ucx_req->target = target;
873-
if (target_dt != NULL) {
874-
ucx_req->target_dt = target_dt;
875-
if (!ompi_datatype_is_predefined(target_dt)) {
876-
OBJ_RETAIN(ucx_req->target_dt);
877-
}
873+
assert(target_dt != NULL);
874+
ucx_req->target_dt = target_dt;
875+
if (!ompi_datatype_is_predefined(target_dt)) {
876+
OBJ_RETAIN(ucx_req->target_dt);
878877
}
879878
ucx_req->target_disp = target_disp;
880879
ucx_req->target_count = target_count;
@@ -884,9 +883,11 @@ static inline int ompi_osc_ucx_acc_rputget(void *stage_addr, int stage_count,
884883
module->skip_sync_check = true; /* we already hold the acc lock, so no need for sync check*/
885884

886885
if (is_put) {
886+
assert(origin_dt != NULL);
887887
ret = ompi_osc_ucx_put(origin_addr, origin_count, origin_dt, target, target_disp,
888888
target_count, target_dt, win);
889889
} else {
890+
assert(stage_dt != NULL);
890891
ret = ompi_osc_ucx_get(stage_addr, stage_count, stage_dt, target, target_disp,
891892
target_count, target_dt, win);
892893
}
@@ -1615,7 +1616,7 @@ void ompi_osc_ucx_req_completion(void *request) {
16151616
assert(req->phase != ACC_INIT);
16161617
void *free_addr = NULL;
16171618
bool release_lock = false;
1618-
ptrdiff_t temp_extent;
1619+
ptrdiff_t temp_lb, temp_extent;
16191620
const void *origin_addr = req->origin_addr;
16201621
int origin_count = req->origin_count;
16211622
struct ompi_datatype_t *origin_dt = req->origin_dt;
@@ -1723,6 +1724,7 @@ void ompi_osc_ucx_req_completion(void *request) {
17231724
} else {
17241725
int i;
17251726
void *curr_origin_addr = origin_ucx_iov[origin_ucx_iov_idx].addr;
1727+
ompi_datatype_get_true_extent(temp_dt, &temp_lb, &temp_extent);
17261728
for (i = 0; i < (int)temp_count; i++) {
17271729
ompi_op_reduce(op, curr_origin_addr,
17281730
(void *)((char *)temp_addr + i * temp_extent),

ompi/mca/osc/ucx/osc_ucx_component.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
650650
module->noncontig_shared_win = false;
651651
if (OMPI_SUCCESS != opal_info_get_bool(info, "alloc_shared_noncontig",
652652
&module->noncontig_shared_win, &flag)) {
653+
free(rbuf);
653654
goto error;
654655
}
655656

@@ -879,6 +880,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
879880
if (module->disp_units) free(module->disp_units);
880881
if (module->comm) ompi_comm_free(&module->comm);
881882
free(module);
883+
module = NULL;
882884

883885
error_nomem:
884886
if (env_initialized == true) {
@@ -887,7 +889,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
887889
mca_osc_ucx_component.env_initialized = false;
888890
}
889891

890-
if (0 == ompi_comm_rank (module->comm) && unlink_needed) {
892+
if ((NULL != module) && (0 == ompi_comm_rank (module->comm)) && unlink_needed) {
891893
opal_shmem_unlink (&module->seg_ds);
892894
}
893895
ompi_osc_ucx_unregister_progress();
@@ -1082,7 +1084,10 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) {
10821084
}
10831085
OBJ_DESTRUCT(&module->pending_posts);
10841086

1085-
opal_common_ucx_ctx_flush(module->ctx, OPAL_COMMON_UCX_SCOPE_WORKER, 0);
1087+
ret = opal_common_ucx_ctx_flush(module->ctx, OPAL_COMMON_UCX_SCOPE_WORKER, 0);
1088+
if (OPAL_SUCCESS != ret) {
1089+
return ret;
1090+
}
10861091

10871092
ret = module->comm->c_coll->coll_barrier(module->comm,
10881093
module->comm->c_coll->coll_barrier_module);

opal/mca/common/ucx/common_ucx_wpool.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ OPAL_DECLSPEC int opal_common_ucx_ctx_flush(opal_common_ucx_ctx_t *ctx,
872872
}
873873

874874
/* progress the nonblocking operations */
875-
while (ctx->num_incomplete_req_ops != 0) {
875+
while (ctx->num_incomplete_req_ops > 0) {
876876
spin++;
877877
rc = ctx_flush(ctx, OPAL_COMMON_UCX_SCOPE_WORKER, 0);
878878
if (rc != OPAL_SUCCESS) {

opal/mca/common/ucx/common_ucx_wpool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ typedef struct {
8888
char *recv_worker_addrs;
8989
int *recv_worker_displs;
9090
size_t comm_size;
91-
opal_atomic_size_t num_incomplete_req_ops;
91+
opal_atomic_int64_t num_incomplete_req_ops;
9292
} opal_common_ucx_ctx_t;
9393

9494
/* Worker Pool memory (wpmem) is an object that represents a remotely accessible

0 commit comments

Comments
 (0)