Skip to content

Commit e77e31b

Browse files
authored
Merge pull request #5378 from hoopoepg/topic/unify-ucx-logging
MCA/COMMON/UCX: unified logging across all UCX modules
2 parents 3e98876 + 2406701 commit e77e31b

File tree

15 files changed

+164
-185
lines changed

15 files changed

+164
-185
lines changed

ompi/mca/osc/ucx/osc_ucx.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414

1515
#include "ompi/group/group.h"
1616
#include "ompi/communicator/communicator.h"
17+
#include "opal/mca/common/ucx/common_ucx.h"
18+
19+
#define OSC_UCX_ASSERT MCA_COMMON_UCX_ASSERT
20+
#define OSC_UCX_ERROR MCA_COMMON_UCX_ERROR
21+
#define OSC_UCX_VERBOSE MCA_COMMON_UCX_VERBOSE
1722

1823
#define OMPI_OSC_UCX_POST_PEER_MAX 32
1924
#define OMPI_OSC_UCX_ATTACH_MAX 32

ompi/mca/osc/ucx/osc_ucx_active_target.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,7 @@ int ompi_osc_ucx_complete(struct ompi_win_t *win) {
190190
status = ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD, 1,
191191
sizeof(uint64_t), remote_addr, rkey);
192192
if (status != UCS_OK) {
193-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
194-
"%s:%d: ucp_atomic_post failed: %d\n",
195-
__FILE__, __LINE__, status);
193+
OSC_UCX_VERBOSE(1, "ucp_atomic_post failed: %d", status);
196194
}
197195

198196
opal_common_ucx_ep_flush(ep, mca_osc_ucx_component.ucp_worker);

ompi/mca/osc/ucx/osc_ucx_comm.c

Lines changed: 13 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -165,18 +165,14 @@ static inline int ddt_put_get(ompi_osc_ucx_module_t *module,
165165
status = ucp_put_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr, curr_len,
166166
remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey);
167167
if (status != UCS_OK && status != UCS_INPROGRESS) {
168-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
169-
"%s:%d: ucp_put_nbi failed: %d\n",
170-
__FILE__, __LINE__, status);
168+
OSC_UCX_VERBOSE(1, "ucp_put_nbi failed: %d", status);
171169
return OMPI_ERROR;
172170
}
173171
} else {
174172
status = ucp_get_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr, curr_len,
175173
remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey);
176174
if (status != UCS_OK && status != UCS_INPROGRESS) {
177-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
178-
"%s:%d: ucp_get_nbi failed: %d\n",
179-
__FILE__, __LINE__, status);
175+
OSC_UCX_VERBOSE(1, "ucp_get_nbi failed: %d",status);
180176
return OMPI_ERROR;
181177
}
182178
}
@@ -210,19 +206,15 @@ static inline int ddt_put_get(ompi_osc_ucx_module_t *module,
210206
origin_ucx_iov[origin_ucx_iov_idx].len,
211207
remote_addr + target_lb + prev_len, rkey);
212208
if (status != UCS_OK && status != UCS_INPROGRESS) {
213-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
214-
"%s:%d: ucp_put_nbi failed: %d\n",
215-
__FILE__, __LINE__, status);
209+
OSC_UCX_VERBOSE(1, "ucp_put_nbi failed: %d", status);
216210
return OMPI_ERROR;
217211
}
218212
} else {
219213
status = ucp_get_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr,
220214
origin_ucx_iov[origin_ucx_iov_idx].len,
221215
remote_addr + target_lb + prev_len, rkey);
222216
if (status != UCS_OK && status != UCS_INPROGRESS) {
223-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
224-
"%s:%d: ucp_get_nbi failed: %d\n",
225-
__FILE__, __LINE__, status);
217+
OSC_UCX_VERBOSE(1, "ucp_get_nbi failed: %d", status);
226218
return OMPI_ERROR;
227219
}
228220
}
@@ -243,19 +235,15 @@ static inline int ddt_put_get(ompi_osc_ucx_module_t *module,
243235
target_ucx_iov[target_ucx_iov_idx].len,
244236
remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey);
245237
if (status != UCS_OK && status != UCS_INPROGRESS) {
246-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
247-
"%s:%d: ucp_put_nbi failed: %d\n",
248-
__FILE__, __LINE__, status);
238+
OSC_UCX_VERBOSE(1, "ucp_put_nbi failed: %d", status);
249239
return OMPI_ERROR;
250240
}
251241
} else {
252242
status = ucp_get_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb + prev_len),
253243
target_ucx_iov[target_ucx_iov_idx].len,
254244
remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey);
255245
if (status != UCS_OK && status != UCS_INPROGRESS) {
256-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
257-
"%s:%d: ucp_get_nbi failed: %d\n",
258-
__FILE__, __LINE__, status);
246+
OSC_UCX_VERBOSE(1, "ucp_get_nbi failed: %d", status);
259247
return OMPI_ERROR;
260248
}
261249
}
@@ -292,9 +280,7 @@ static inline int start_atomicity(ompi_osc_ucx_module_t *module, ucp_ep_h ep, in
292280
remote_addr, rkey,
293281
mca_osc_ucx_component.ucp_worker);
294282
if (status != UCS_OK) {
295-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
296-
"%s:%d: ucp_atomic_cswap64 failed: %d\n",
297-
__FILE__, __LINE__, status);
283+
OSC_UCX_VERBOSE(1, "ucp_atomic_cswap64 failed: %d", status);
298284
return OMPI_ERROR;
299285
}
300286
}
@@ -339,9 +325,7 @@ static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module
339325

340326
status = ucp_get_nbi(ep, (void *)temp_buf, len, remote_state_addr, state_rkey);
341327
if (status != UCS_OK && status != UCS_INPROGRESS) {
342-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
343-
"%s:%d: ucp_get_nbi failed: %d\n",
344-
__FILE__, __LINE__, status);
328+
OSC_UCX_VERBOSE(1, "ucp_get_nbi failed: %d", status);
345329
return OMPI_ERROR;
346330
}
347331

@@ -361,9 +345,7 @@ static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module
361345
status = ucp_ep_rkey_unpack(ep, temp_dynamic_wins[contain].rkey_buffer,
362346
&((module->win_info_array[target]).rkey));
363347
if (status != UCS_OK) {
364-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
365-
"%s:%d: ucp_ep_rkey_unpack failed: %d\n",
366-
__FILE__, __LINE__, status);
348+
OSC_UCX_VERBOSE(1, "ucp_ep_rkey_unpack failed: %d", status);
367349
return OMPI_ERROR;
368350
}
369351

@@ -416,9 +398,7 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data
416398
status = ucp_put_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb), origin_len,
417399
remote_addr + target_lb, rkey);
418400
if (status != UCS_OK && status != UCS_INPROGRESS) {
419-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
420-
"%s:%d: ucp_put_nbi failed: %d\n",
421-
__FILE__, __LINE__, status);
401+
OSC_UCX_VERBOSE(1, "ucp_put_nbi failed: %d", status);
422402
return OMPI_ERROR;
423403
}
424404
return incr_and_check_ops_num(module, target, ep);
@@ -472,9 +452,7 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count,
472452
status = ucp_get_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb), origin_len,
473453
remote_addr + target_lb, rkey);
474454
if (status != UCS_OK && status != UCS_INPROGRESS) {
475-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
476-
"%s:%d: ucp_get_nbi failed: %d\n",
477-
__FILE__, __LINE__, status);
455+
OSC_UCX_VERBOSE(1, "ucp_get_nbi failed: %d", status);
478456
return OMPI_ERROR;
479457
}
480458

@@ -895,9 +873,7 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count,
895873

896874
status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker);
897875
if (status != UCS_OK) {
898-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
899-
"%s:%d: ucp_worker_fence failed: %d\n",
900-
__FILE__, __LINE__, status);
876+
OSC_UCX_VERBOSE(1, "ucp_worker_fence failed: %d", status);
901877
return OMPI_ERROR;
902878
}
903879

@@ -956,9 +932,7 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count,
956932

957933
status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker);
958934
if (status != UCS_OK) {
959-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
960-
"%s:%d: ucp_worker_fence failed: %d\n",
961-
__FILE__, __LINE__, status);
935+
OSC_UCX_VERBOSE(1, "ucp_worker_fence failed: %d", status);
962936
return OMPI_ERROR;
963937
}
964938

ompi/mca/osc/ucx/osc_ucx_component.c

Lines changed: 17 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,7 @@ static int component_init(bool enable_progress_threads, bool enable_mpi_threads)
125125

126126
status = ucp_config_read("MPI", NULL, &config);
127127
if (UCS_OK != status) {
128-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
129-
"%s:%d: ucp_config_read failed: %d\n",
130-
__FILE__, __LINE__, status);
128+
OSC_UCX_VERBOSE(1, "ucp_config_read failed: %d", status);
131129
return OMPI_ERROR;
132130
}
133131

@@ -139,9 +137,7 @@ static int component_init(bool enable_progress_threads, bool enable_mpi_threads)
139137
OBJ_CLASS(ompi_osc_ucx_request_t),
140138
0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL);
141139
if (OMPI_SUCCESS != ret) {
142-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
143-
"%s:%d: opal_free_list_init failed: %d\n",
144-
__FILE__, __LINE__, ret);
140+
OSC_UCX_VERBOSE(1, "opal_free_list_init failed: %d", ret);
145141
goto error;
146142
}
147143

@@ -164,9 +160,7 @@ static int component_init(bool enable_progress_threads, bool enable_mpi_threads)
164160
status = ucp_init(&context_params, config, &mca_osc_ucx_component.ucp_context);
165161
ucp_config_release(config);
166162
if (UCS_OK != status) {
167-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
168-
"%s:%d: ucp_init failed: %d\n",
169-
__FILE__, __LINE__, status);
163+
OSC_UCX_VERBOSE(1, "ucp_init failed: %d", status);
170164
ret = OMPI_ERROR;
171165
goto error;
172166
}
@@ -196,6 +190,7 @@ static int component_finalize(void) {
196190
OBJ_DESTRUCT(&mca_osc_ucx_component.requests);
197191
opal_progress_unregister(progress_callback);
198192
ucp_cleanup(mca_osc_ucx_component.ucp_context);
193+
opal_common_ucx_mca_deregister();
199194
return OMPI_SUCCESS;
200195
}
201196

@@ -265,19 +260,15 @@ static inline int mem_map(void **base, size_t size, ucp_mem_h *memh_ptr,
265260

266261
status = ucp_mem_map(mca_osc_ucx_component.ucp_context, &mem_params, memh_ptr);
267262
if (status != UCS_OK) {
268-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
269-
"%s:%d: ucp_mem_map failed: %d\n",
270-
__FILE__, __LINE__, status);
263+
OSC_UCX_VERBOSE(1, "ucp_mem_map failed: %d", status);
271264
ret = OMPI_ERROR;
272265
goto error;
273266
}
274267

275268
mem_attrs.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS | UCP_MEM_ATTR_FIELD_LENGTH;
276269
status = ucp_mem_query((*memh_ptr), &mem_attrs);
277270
if (status != UCS_OK) {
278-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
279-
"%s:%d: ucp_mem_query failed: %d\n",
280-
__FILE__, __LINE__, status);
271+
OSC_UCX_VERBOSE(1, "ucp_mem_query failed: %d", status);
281272
ret = OMPI_ERROR;
282273
goto error;
283274
}
@@ -336,38 +327,30 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
336327
status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params,
337328
&(mca_osc_ucx_component.ucp_worker));
338329
if (UCS_OK != status) {
339-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
340-
"%s:%d: ucp_worker_create failed: %d\n",
341-
__FILE__, __LINE__, status);
330+
OSC_UCX_VERBOSE(1, "ucp_worker_create failed: %d", status);
342331
ret = OMPI_ERROR;
343332
goto error_nomem;
344333
}
345334

346335
ret = opal_progress_register(progress_callback);
347336
progress_registered = true;
348337
if (OMPI_SUCCESS != ret) {
349-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
350-
"%s:%d: opal_progress_register failed: %d\n",
351-
__FILE__, __LINE__, ret);
338+
OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret);
352339
goto error;
353340
}
354341

355342
/* query UCP worker attributes */
356343
worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE;
357344
status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr);
358345
if (UCS_OK != status) {
359-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
360-
"%s:%d: ucp_worker_query failed: %d\n",
361-
__FILE__, __LINE__, status);
346+
OSC_UCX_VERBOSE(1, "ucp_worker_query failed: %d", status);
362347
ret = OMPI_ERROR;
363348
goto error_nomem;
364349
}
365350

366351
if (mca_osc_ucx_component.enable_mpi_threads == true &&
367352
worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) {
368-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
369-
"%s:%d: ucx does not support multithreading\n",
370-
__FILE__, __LINE__);
353+
OSC_UCX_VERBOSE(1, "ucx does not support multithreading");
371354
ret = OMPI_ERROR;
372355
goto error_nomem;
373356
}
@@ -450,9 +433,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
450433
status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker,
451434
&my_addr, &my_addr_len);
452435
if (status != UCS_OK) {
453-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
454-
"%s:%d: ucp_worker_get_address failed: %d\n",
455-
__FILE__, __LINE__, status);
436+
OSC_UCX_VERBOSE(1, "ucp_worker_get_address failed: %d", status);
456437
ret = OMPI_ERROR;
457438
goto error;
458439
}
@@ -472,9 +453,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
472453
ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]);
473454
status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep);
474455
if (status != UCS_OK) {
475-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
476-
"%s:%d: ucp_ep_create failed: %d\n",
477-
__FILE__, __LINE__, status);
456+
OSC_UCX_VERBOSE(1, "ucp_ep_create failed: %d", status);
478457
ret = OMPI_ERROR;
479458
goto error;
480459
}
@@ -519,9 +498,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
519498
status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh,
520499
&rkey_buffer, &rkey_buffer_size);
521500
if (status != UCS_OK) {
522-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
523-
"%s:%d: ucp_rkey_pack failed: %d\n",
524-
__FILE__, __LINE__, status);
501+
OSC_UCX_VERBOSE(1, "ucp_rkey_pack failed: %d", status);
525502
ret = OMPI_ERROR;
526503
goto error;
527504
}
@@ -532,9 +509,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
532509
status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh,
533510
&state_rkey_buffer, &state_rkey_buffer_size);
534511
if (status != UCS_OK) {
535-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
536-
"%s:%d: ucp_rkey_pack failed: %d\n",
537-
__FILE__, __LINE__, status);
512+
OSC_UCX_VERBOSE(1, "ucp_rkey_pack failed: %d", status);
538513
ret = OMPI_ERROR;
539514
goto error;
540515
}
@@ -581,9 +556,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
581556
status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]),
582557
&((module->win_info_array[i]).rkey));
583558
if (status != UCS_OK) {
584-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
585-
"%s:%d: ucp_ep_rkey_unpack failed: %d\n",
586-
__FILE__, __LINE__, status);
559+
OSC_UCX_VERBOSE(1, "ucp_ep_rkey_unpack failed: %d", status);
587560
ret = OMPI_ERROR;
588561
goto error;
589562
}
@@ -593,9 +566,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
593566
status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]),
594567
&((module->state_info_array[i]).rkey));
595568
if (status != UCS_OK) {
596-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
597-
"%s:%d: ucp_ep_rkey_unpack failed: %d\n",
598-
__FILE__, __LINE__, status);
569+
OSC_UCX_VERBOSE(1, "ucp_ep_rkey_unpack failed: %d", status);
599570
ret = OMPI_ERROR;
600571
goto error;
601572
}
@@ -750,9 +721,7 @@ int ompi_osc_ucx_win_attach(struct ompi_win_t *win, void *base, size_t len) {
750721
module->local_dynamic_win_info[insert_index].memh,
751722
&rkey_buffer, (size_t *)&rkey_buffer_size);
752723
if (status != UCS_OK) {
753-
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
754-
"%s:%d: ucp_rkey_pack failed: %d\n",
755-
__FILE__, __LINE__, status);
724+
OSC_UCX_VERBOSE(1, "ucp_rkey_pack failed: %d", status);
756725
return OMPI_ERROR;
757726
}
758727

0 commit comments

Comments
 (0)