Skip to content

Commit 43f16d8

Browse files
committed
opal/common/ucx: Remove common_ucx_int.h
Place the content of common_ucx_int.h back to the common_ucx.h and include common_ucx_wpool.h explicitly. Signed-off-by: Artem Polyakov <artpol84@gmail.com>
1 parent c6de099 commit 43f16d8

File tree

7 files changed

+208
-216
lines changed

7 files changed

+208
-216
lines changed

ompi/mca/osc/ucx/osc_ucx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "ompi/group/group.h"
1616
#include "ompi/communicator/communicator.h"
1717
#include "opal/mca/common/ucx/common_ucx.h"
18+
#include "opal/mca/common/ucx/common_ucx_wpool.h"
1819

1920
#define OSC_UCX_ASSERT MCA_COMMON_UCX_ASSERT
2021
#define OSC_UCX_ERROR MCA_COMMON_UCX_ERROR

opal/mca/common/ucx/Makefile.am

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
headers = \
1616
common_ucx.h \
17-
common_ucx_int.h \
1817
common_ucx_wpool.h \
1918
common_ucx_wpool_int.h
2019

opal/mca/common/ucx/common_ucx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
#include "opal_config.h"
1111

12-
#include "common_ucx_int.h"
12+
#include "common_ucx.h"
1313
#include "opal/mca/base/mca_base_var.h"
1414
#include "opal/mca/base/mca_base_framework.h"
1515
#include "opal/mca/pmix/pmix.h"

opal/mca/common/ucx/common_ucx.h

Lines changed: 204 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,209 @@
1313
#ifndef _COMMON_UCX_H_
1414
#define _COMMON_UCX_H_
1515

16-
#include "common_ucx_int.h"
17-
#include "common_ucx_wpool.h"
16+
#include "opal_config.h"
17+
18+
#include <stdint.h>
19+
20+
#include <ucp/api/ucp.h>
21+
22+
#include "opal/mca/mca.h"
23+
#include "opal/util/output.h"
24+
#include "opal/runtime/opal_progress.h"
25+
#include "opal/include/opal/constants.h"
26+
#include "opal/class/opal_list.h"
27+
28+
BEGIN_C_DECLS
29+
30+
#define MCA_COMMON_UCX_ENABLE_DEBUG OPAL_ENABLE_DEBUG
31+
#if MCA_COMMON_UCX_ENABLE_DEBUG
32+
# define MCA_COMMON_UCX_MAX_VERBOSE 100
33+
# define MCA_COMMON_UCX_ASSERT(_x) assert(_x)
34+
#else
35+
# define MCA_COMMON_UCX_MAX_VERBOSE 2
36+
# define MCA_COMMON_UCX_ASSERT(_x)
37+
#endif
38+
39+
#define MCA_COMMON_UCX_PER_TARGET_OPS_THRESHOLD 100000
40+
#define MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD 1000000
41+
42+
#define _MCA_COMMON_UCX_QUOTE(_x) \
43+
# _x
44+
#define MCA_COMMON_UCX_QUOTE(_x) \
45+
_MCA_COMMON_UCX_QUOTE(_x)
46+
47+
#define MCA_COMMON_UCX_ERROR(...) \
48+
opal_output_verbose(0, opal_common_ucx.output, \
49+
__FILE__ ":" MCA_COMMON_UCX_QUOTE(__LINE__) \
50+
" Error: " __VA_ARGS__)
51+
52+
#define MCA_COMMON_UCX_VERBOSE(_level, ... ) \
53+
if (((_level) <= MCA_COMMON_UCX_MAX_VERBOSE) && \
54+
((_level) <= opal_common_ucx.verbose)) { \
55+
opal_output_verbose(_level, opal_common_ucx.output, \
56+
__FILE__ ":" MCA_COMMON_UCX_QUOTE(__LINE__) " " \
57+
__VA_ARGS__); \
58+
}
59+
60+
/* progress loop to allow call UCX/opal progress */
61+
/* used C99 for-statement variable initialization */
62+
#define MCA_COMMON_UCX_PROGRESS_LOOP(_worker) \
63+
for (unsigned iter = 0;; (++iter % opal_common_ucx.progress_iterations) ? \
64+
(void)ucp_worker_progress(_worker) : opal_progress())
65+
66+
#define MCA_COMMON_UCX_WAIT_LOOP(_request, _worker, _msg, _completed) \
67+
do { \
68+
ucs_status_t status; \
69+
/* call UCX progress */ \
70+
MCA_COMMON_UCX_PROGRESS_LOOP(_worker) { \
71+
status = opal_common_ucx_request_status(_request); \
72+
if (UCS_INPROGRESS != status) { \
73+
_completed; \
74+
if (OPAL_LIKELY(UCS_OK == status)) { \
75+
return OPAL_SUCCESS; \
76+
} else { \
77+
MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", \
78+
(_msg) ? (_msg) : __func__, \
79+
UCS_PTR_STATUS(_request), \
80+
ucs_status_string(UCS_PTR_STATUS(_request))); \
81+
return OPAL_ERROR; \
82+
} \
83+
} \
84+
} \
85+
} while (0)
86+
87+
typedef struct opal_common_ucx_module {
88+
int output;
89+
int verbose;
90+
int progress_iterations;
91+
int registered;
92+
bool opal_mem_hooks;
93+
} opal_common_ucx_module_t;
94+
95+
typedef struct opal_common_ucx_del_proc {
96+
ucp_ep_h ep;
97+
size_t vpid;
98+
} opal_common_ucx_del_proc_t;
99+
100+
extern opal_common_ucx_module_t opal_common_ucx;
101+
102+
OPAL_DECLSPEC void opal_common_ucx_mca_register(void);
103+
OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void);
104+
OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
105+
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
106+
OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, size_t count,
107+
size_t my_rank, size_t max_disconnect, ucp_worker_h worker);
108+
OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component);
109+
110+
static inline
111+
ucs_status_t opal_common_ucx_request_status(ucs_status_ptr_t request)
112+
{
113+
#if !HAVE_DECL_UCP_REQUEST_CHECK_STATUS
114+
ucp_tag_recv_info_t info;
115+
116+
return ucp_request_test(request, &info);
117+
#else
118+
return ucp_request_check_status(request);
119+
#endif
120+
}
121+
122+
static inline
123+
int opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h worker,
124+
const char *msg)
125+
{
126+
/* check for request completed or failed */
127+
if (OPAL_LIKELY(UCS_OK == request)) {
128+
return OPAL_SUCCESS;
129+
} else if (OPAL_UNLIKELY(UCS_PTR_IS_ERR(request))) {
130+
MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", msg ? msg : __func__,
131+
UCS_PTR_STATUS(request),
132+
ucs_status_string(UCS_PTR_STATUS(request)));
133+
return OPAL_ERROR;
134+
}
135+
136+
MCA_COMMON_UCX_WAIT_LOOP(request, worker, msg, ucp_request_free(request));
137+
}
138+
139+
static inline
140+
int opal_common_ucx_ep_flush(ucp_ep_h ep, ucp_worker_h worker)
141+
{
142+
#if HAVE_DECL_UCP_EP_FLUSH_NB
143+
ucs_status_ptr_t request;
144+
145+
request = ucp_ep_flush_nb(ep, 0, opal_common_ucx_empty_complete_cb);
146+
return opal_common_ucx_wait_request(request, worker, "ucp_ep_flush_nb");
147+
#else
148+
ucs_status_t status;
149+
150+
status = ucp_ep_flush(ep);
151+
return (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR;
152+
#endif
153+
}
154+
155+
static inline
156+
int opal_common_ucx_worker_flush(ucp_worker_h worker)
157+
{
158+
#if HAVE_DECL_UCP_WORKER_FLUSH_NB
159+
ucs_status_ptr_t request;
160+
161+
request = ucp_worker_flush_nb(worker, 0, opal_common_ucx_empty_complete_cb);
162+
return opal_common_ucx_wait_request(request, worker, "ucp_worker_flush_nb");
163+
#else
164+
ucs_status_t status;
165+
166+
status = ucp_worker_flush(worker);
167+
return (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR;
168+
#endif
169+
}
170+
171+
static inline
172+
int opal_common_ucx_atomic_fetch(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode,
173+
uint64_t value, void *result, size_t op_size,
174+
uint64_t remote_addr, ucp_rkey_h rkey,
175+
ucp_worker_h worker)
176+
{
177+
ucs_status_ptr_t request;
178+
179+
request = ucp_atomic_fetch_nb(ep, opcode, value, result, op_size,
180+
remote_addr, rkey, opal_common_ucx_empty_complete_cb);
181+
return opal_common_ucx_wait_request(request, worker, "ucp_atomic_fetch_nb");
182+
}
183+
184+
static inline
185+
ucs_status_ptr_t opal_common_ucx_atomic_fetch_nb(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode,
186+
uint64_t value, void *result, size_t op_size,
187+
uint64_t remote_addr, ucp_rkey_h rkey,
188+
ucp_send_callback_t req_handler,
189+
ucp_worker_h worker)
190+
{
191+
return ucp_atomic_fetch_nb(ep, opcode, value, result, op_size,
192+
remote_addr, rkey, req_handler);
193+
}
194+
195+
static inline
196+
int opal_common_ucx_atomic_cswap(ucp_ep_h ep, uint64_t compare,
197+
uint64_t value, void *result, size_t op_size,
198+
uint64_t remote_addr, ucp_rkey_h rkey,
199+
ucp_worker_h worker)
200+
{
201+
uint64_t tmp = value;
202+
int ret;
203+
204+
ret = opal_common_ucx_atomic_fetch(ep, UCP_ATOMIC_FETCH_OP_CSWAP, compare, &tmp,
205+
op_size, remote_addr, rkey, worker);
206+
if (OPAL_LIKELY(OPAL_SUCCESS == ret)) {
207+
/* in case if op_size is constant (like sizeof(type)) then this condition
208+
* is evaluated in compile time */
209+
if (op_size == sizeof(uint64_t)) {
210+
*(uint64_t*)result = tmp;
211+
} else {
212+
assert(op_size == sizeof(uint32_t));
213+
*(uint32_t*)result = tmp;
214+
}
215+
}
216+
return ret;
217+
}
218+
219+
END_C_DECLS
18220

19221
#endif

0 commit comments

Comments
 (0)