Skip to content

Commit 26e244c

Browse files
committed
opal/accelerator: Remove function table and dlopen logic
Instead of dlopening cuda, add direct dependency on libcuda. This also means we can remove the dlopen dependency. Signed-off-by: William Zhang <wilzhang@amazon.com>
1 parent afd23c9 commit 26e244c

File tree

7 files changed

+83
-248
lines changed

7 files changed

+83
-248
lines changed

config/opal_check_cuda.m4

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,22 @@ dnl
2727
dnl $HEADER$
2828
dnl
2929

30+
31+
# OPAL_CHECK_CUDA(prefix, [action-if-found], [action-if-not-found])
32+
# --------------------------------------------------------
33+
# check if CUDA support can be found. sets prefix_{CPPFLAGS,
34+
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
35+
# support, otherwise executes action-if-not-found
36+
37+
#
38+
# Check for CUDA support
39+
#
3040
AC_DEFUN([OPAL_CHECK_CUDA],[
41+
OPAL_VAR_SCOPE_PUSH([cuda_save_CPPFLAGS cuda_save_LDFLAGS cuda_save_LIBS])
42+
43+
cuda_save_CPPFLAGS="$CPPFLAGS"
44+
cuda_save_LDFLAGS="$LDFLAGS"
45+
cuda_save_LIBS="$LIBS"
3146
#
3247
# Check to see if user wants CUDA support
3348
#
@@ -72,12 +87,15 @@ AS_IF([test "$with_cuda" = "no" || test "x$with_cuda" = "x"],
7287
opal_cuda_incdir="$with_cuda/include"
7388
AC_MSG_RESULT([found ($opal_cuda_incdir/cuda.h)])])])])])
7489
75-
dnl We cannot have CUDA support without dlopen support. HOWEVER, at
76-
dnl this point in configure, we can't know whether the DL framework
77-
dnl has been configured or not yet (it likely hasn't, since CUDA is a
78-
dnl common framework, and likely configured first). So we have to
79-
dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
80-
dnl macro, below). :-(
90+
AS_IF([test "$opal_check_cuda_happy" = "yes"],
91+
[OAC_CHECK_PACKAGE([cuda],
92+
[$1],
93+
[cuda.h],
94+
[cuda],
95+
[cuMemFree],
96+
[opal_check_cuda_happy="yes"],
97+
[opal_check_cuda_happy="no"])],
98+
[])
8199
82100
# We require CUDA IPC support which started in CUDA 4.1. Error
83101
# out if the support is not there.
@@ -144,22 +162,9 @@ AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "
144162
AC_DEFINE_UNQUOTED([OPAL_CUDA_GDR_SUPPORT],$CUDA_VERSION_60_OR_GREATER,
145163
[Whether we have CUDA GDR support available])
146164
165+
CPPFLAGS=${cuda_save_CPPFLAGS}
166+
LDFLAGS=${cuda_save_LDFLAGS}
167+
LIBS=${cuda_save_LIBS}
168+
OPAL_VAR_SCOPE_POP
147169
])
148170

149-
dnl
150-
dnl CUDA support requires DL support (it dynamically opens the CUDA
151-
dnl library at run time). But we do not check for OPAL DL support
152-
dnl until lafter the initial OPAL_CHECK_CUDA is called. So put the
153-
dnl CUDA+DL check in a separate macro that can be called after the DL MCA
154-
dnl framework checks in the top-level configure.ac.
155-
dnl
156-
AC_DEFUN([OPAL_CHECK_CUDA_AFTER_OPAL_DL],[
157-
158-
# We cannot have CUDA support without OPAL DL support. Error out
159-
# if the user wants CUDA but we do not have OPAL DL support.
160-
AS_IF([test $OPAL_HAVE_DL_SUPPORT -eq 0 && \
161-
test "$opal_check_cuda_happy" = "yes"],
162-
[AC_MSG_WARN([--with-cuda was specified, but dlopen support is disabled.])
163-
AC_MSG_WARN([You must reconfigure Open MPI with dlopen ("dl") support.])
164-
AC_MSG_ERROR([Cannot continue.])])
165-
])

configure.ac

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,14 @@ AC_CACHE_SAVE
987987

988988
opal_show_title "System-specific tests"
989989

990-
OPAL_CHECK_CUDA
990+
################
991+
# CUDA support #
992+
################
993+
# Note, we should remove this when opal/cuda is removed
994+
OPAL_CHECK_CUDA([opal_cuda],
995+
[opal_cuda_happy="yes"],
996+
[opal_cuda_happy="no"])
997+
991998
##################################
992999
OPAL_CHECK_OS_FLAVORS
9931000

@@ -1233,8 +1240,6 @@ AC_CACHE_SAVE
12331240
# be done better by having some kind of "run this check at the end of
12341241
# all other MCA checks" hook...?
12351242

1236-
OPAL_CHECK_CUDA_AFTER_OPAL_DL
1237-
12381243
OPAL_CHECK_ROCM_AFTER_OPAL_DL
12391244

12401245
##################################

opal/mca/accelerator/cuda/Makefile.am

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,13 @@ endif
3232

3333
mcacomponentdir = $(opallibdir)
3434
mcacomponent_LTLIBRARIES = $(component_install)
35+
3536
mca_accelerator_cuda_la_SOURCES = $(sources)
3637
mca_accelerator_cuda_la_LDFLAGS = -module -avoid-version
37-
mca_accelerator_cuda_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la
38+
mca_accelerator_cuda_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \
39+
$(accelerator_cuda_LIBS)
3840

3941
noinst_LTLIBRARIES = $(component_noinst)
4042
libmca_accelerator_cuda_la_SOURCES =$(sources)
4143
libmca_accelerator_cuda_la_LDFLAGS = -module -avoid-version
44+
libmca_accelerator_cuda_la_LIBADD = $(accelerator_cuda_LIBS)

opal/mca/accelerator/cuda/accelerator_cuda.c

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
9797
CU_POINTER_ATTRIBUTE_IS_MANAGED};
9898
void *attrdata[] = {(void *) &mem_type, (void *) &mem_ctx, (void *) &is_managed};
9999

100-
result = opal_accelerator_cuda_func.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
100+
result = cuPointerGetAttributes(3, attributes, attrdata, dbuf);
101101
OPAL_OUTPUT_VERBOSE((101, opal_accelerator_base_framework.framework_output,
102102
"dbuf=%p, mem_type=%d, mem_ctx=%p, is_managed=%d, result=%d", (void *) dbuf,
103103
(int) mem_type, (void *) mem_ctx, is_managed, result));
@@ -121,7 +121,7 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
121121
/* Must be a device pointer */
122122
assert(CU_MEMORYTYPE_DEVICE == mem_type);
123123
#else /* OPAL_CUDA_GET_ATTRIBUTES */
124-
result = opal_accelerator_cuda_func.cuPointerGetAttribute(&mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
124+
result = cuPointerGetAttribute(&mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
125125
if (CUDA_SUCCESS != result) {
126126
/* If we cannot determine it is device pointer,
127127
* just assume it is not. */
@@ -142,11 +142,11 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
142142
* GPU memory, but no context, get the context from the GPU memory
143143
* and set the current context to that. It is rare that we will not
144144
* have a context. */
145-
result = opal_accelerator_cuda_func.cuCtxGetCurrent(&ctx);
145+
result = cuCtxGetCurrent(&ctx);
146146
if (OPAL_UNLIKELY(NULL == ctx)) {
147147
if (CUDA_SUCCESS == result) {
148148
#if !OPAL_CUDA_GET_ATTRIBUTES
149-
result = opal_accelerator_cuda_func.cuPointerGetAttribute(&mem_ctx, CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
149+
result = cuPointerGetAttribute(&mem_ctx, CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
150150
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
151151
opal_output(0,
152152
"CUDA: error calling cuPointerGetAttribute: "
@@ -155,7 +155,7 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
155155
return OPAL_ERROR;
156156
}
157157
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
158-
result = opal_accelerator_cuda_func.cuCtxSetCurrent(mem_ctx);
158+
result = cuCtxSetCurrent(mem_ctx);
159159
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
160160
opal_output(0,
161161
"CUDA: error calling cuCtxSetCurrent: "
@@ -185,7 +185,7 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
185185
if (OPAL_LIKELY(((CUDA_VERSION > 7000) ? 0 : 1))) {
186186
CUdeviceptr pbase;
187187
size_t psize;
188-
result = opal_accelerator_cuda_func.cuMemGetAddressRange(&pbase, &psize, dbuf);
188+
result = cuMemGetAddressRange(&pbase, &psize, dbuf);
189189
if (CUDA_SUCCESS != result) {
190190
opal_output_verbose(5, opal_accelerator_base_framework.framework_output,
191191
"CUDA: cuMemGetAddressRange failed on this pointer: result=%d, buf=%p "
@@ -214,7 +214,7 @@ static int accelerator_cuda_create_stream(int dev_id, opal_accelerator_stream_t
214214
return OPAL_ERR_OUT_OF_RESOURCE;
215215
}
216216

217-
result = opal_accelerator_cuda_func.cuStreamCreate((*stream)->stream, 0);
217+
result = cuStreamCreate((*stream)->stream, 0);
218218
if (OPAL_UNLIKELY(result != CUDA_SUCCESS)) {
219219
opal_show_help("help-accelerator-cuda.txt", "cuStreamCreate failed", true,
220220
OPAL_PROC_MY_HOSTNAME, result);
@@ -230,7 +230,7 @@ static void opal_accelerator_cuda_stream_destruct(opal_accelerator_cuda_stream_t
230230
CUresult result;
231231

232232
if (NULL != stream->base.stream) {
233-
result = opal_accelerator_cuda_func.cuStreamDestroy(*(CUstream *)stream->base.stream);
233+
result = cuStreamDestroy(*(CUstream *)stream->base.stream);
234234
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
235235
opal_show_help("help-accelerator-cuda.txt", "cuStreamDestroy failed", true,
236236
result);
@@ -259,7 +259,7 @@ static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **
259259
OBJ_RELEASE(*event);
260260
return OPAL_ERR_OUT_OF_RESOURCE;
261261
}
262-
result = opal_accelerator_cuda_func.cuEventCreate((*event)->event, CU_EVENT_DISABLE_TIMING);
262+
result = cuEventCreate((*event)->event, CU_EVENT_DISABLE_TIMING);
263263
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
264264
opal_show_help("help-accelerator-cuda.txt", "cuEventCreate failed", true,
265265
OPAL_PROC_MY_HOSTNAME, result);
@@ -274,7 +274,7 @@ static void opal_accelerator_cuda_event_destruct(opal_accelerator_cuda_event_t *
274274
{
275275
CUresult result;
276276
if (NULL != event->base.event) {
277-
result = opal_accelerator_cuda_func.cuEventDestroy(*(CUevent *)event->base.event);
277+
result = cuEventDestroy(*(CUevent *)event->base.event);
278278
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
279279
opal_show_help("help-accelerator-cuda.txt", "cuEventDestroy failed", true,
280280
result);
@@ -297,7 +297,7 @@ static int accelerator_cuda_record_event(int dev_id, opal_accelerator_event_t *e
297297
return OPAL_ERR_BAD_PARAM;
298298
}
299299

300-
result = opal_accelerator_cuda_func.cuEventRecord(*(CUevent *)event->event, *(CUstream *)stream->stream);
300+
result = cuEventRecord(*(CUevent *)event->event, *(CUstream *)stream->stream);
301301
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
302302
opal_show_help("help-accelerator-cuda.txt", "cuEventRecord failed", true,
303303
OPAL_PROC_MY_HOSTNAME, result);
@@ -314,7 +314,7 @@ static int accelerator_cuda_query_event(int dev_id, opal_accelerator_event_t *ev
314314
return OPAL_ERR_BAD_PARAM;
315315
}
316316

317-
result = opal_accelerator_cuda_func.cuEventQuery(*(CUevent *)event->event);
317+
result = cuEventQuery(*(CUevent *)event->event);
318318
switch (result) {
319319
case CUDA_SUCCESS:
320320
{
@@ -344,7 +344,7 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
344344
return OPAL_ERR_BAD_PARAM;
345345
}
346346

347-
result = opal_accelerator_cuda_func.cuMemcpyAsync((CUdeviceptr) dest, (CUdeviceptr) src, size, *(CUstream *)stream->stream);
347+
result = cuMemcpyAsync((CUdeviceptr) dest, (CUdeviceptr) src, size, *(CUstream *)stream->stream);
348348
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
349349
opal_show_help("help-accelerator-cuda.txt", "cuMemcpyAsync failed", true, dest, src,
350350
size, result);
@@ -370,13 +370,13 @@ static int accelerator_cuda_memcpy(int dest_dev_id, int src_dev_id, void *dest,
370370
* Additionally, cuMemcpy is not necessarily always synchronous. See:
371371
* https://docs.nvidia.com/cuda/cuda-driver-api/api-sync-behavior.html
372372
* TODO: Add optimizations for type field */
373-
result = opal_accelerator_cuda_func.cuMemcpyAsync((CUdeviceptr) dest, (CUdeviceptr) src, size, opal_accelerator_cuda_memcpy_stream);
373+
result = cuMemcpyAsync((CUdeviceptr) dest, (CUdeviceptr) src, size, opal_accelerator_cuda_memcpy_stream);
374374
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
375375
opal_show_help("help-accelerator-cuda.txt", "cuMemcpyAsync failed", true, dest, src,
376376
size, result);
377377
return OPAL_ERROR;
378378
}
379-
result = opal_accelerator_cuda_func.cuStreamSynchronize(opal_accelerator_cuda_memcpy_stream);
379+
result = cuStreamSynchronize(opal_accelerator_cuda_memcpy_stream);
380380
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
381381
opal_show_help("help-accelerator-cuda.txt", "cuStreamSynchronize failed", true,
382382
OPAL_PROC_MY_HOSTNAME, result);
@@ -395,29 +395,29 @@ static int accelerator_cuda_memmove(int dest_dev_id, int src_dev_id, void *dest,
395395
return OPAL_ERR_BAD_PARAM;
396396
}
397397

398-
result = opal_accelerator_cuda_func.cuMemAlloc(&tmp, size);
398+
result = cuMemAlloc(&tmp, size);
399399
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
400400
return OPAL_ERROR;
401401
}
402-
result = opal_accelerator_cuda_func.cuMemcpyAsync(tmp, (CUdeviceptr) src, size, opal_accelerator_cuda_memcpy_stream);
402+
result = cuMemcpyAsync(tmp, (CUdeviceptr) src, size, opal_accelerator_cuda_memcpy_stream);
403403
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
404404
opal_show_help("help-accelerator-cuda.txt", "cuMemcpyAsync failed", true, tmp, src, size,
405405
result);
406406
return OPAL_ERROR;
407407
}
408-
result = opal_accelerator_cuda_func.cuMemcpyAsync((CUdeviceptr) dest, tmp, size, opal_accelerator_cuda_memcpy_stream);
408+
result = cuMemcpyAsync((CUdeviceptr) dest, tmp, size, opal_accelerator_cuda_memcpy_stream);
409409
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
410410
opal_show_help("help-accelerator-cuda.txt", "cuMemcpyAsync failed", true, dest, tmp,
411411
size, result);
412412
return OPAL_ERROR;
413413
}
414-
result = opal_accelerator_cuda_func.cuStreamSynchronize(opal_accelerator_cuda_memcpy_stream);
414+
result = cuStreamSynchronize(opal_accelerator_cuda_memcpy_stream);
415415
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
416416
opal_show_help("help-accelerator-cuda.txt", "cuStreamSynchronize failed", true,
417417
OPAL_PROC_MY_HOSTNAME, result);
418418
return OPAL_ERROR;
419419
}
420-
opal_accelerator_cuda_func.cuMemFree(tmp);
420+
cuMemFree(tmp);
421421
return OPAL_SUCCESS;
422422
}
423423

@@ -430,7 +430,7 @@ static int accelerator_cuda_mem_alloc(int dev_id, void **ptr, size_t size)
430430
}
431431

432432
if (size > 0) {
433-
result = opal_accelerator_cuda_func.cuMemAlloc((CUdeviceptr *) ptr, size);
433+
result = cuMemAlloc((CUdeviceptr *) ptr, size);
434434
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
435435
opal_show_help("help-accelerator-cuda.txt", "cuMemAlloc failed", true,
436436
OPAL_PROC_MY_HOSTNAME, result);
@@ -444,7 +444,7 @@ static int accelerator_cuda_mem_release(int dev_id, void *ptr)
444444
{
445445
CUresult result;
446446
if (NULL != ptr) {
447-
result = opal_accelerator_cuda_func.cuMemFree((CUdeviceptr) ptr);
447+
result = cuMemFree((CUdeviceptr) ptr);
448448
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
449449
opal_show_help("help-accelerator-cuda.txt", "cuMemFree failed", true,
450450
OPAL_PROC_MY_HOSTNAME, result);
@@ -463,7 +463,7 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
463463
return OPAL_ERR_BAD_PARAM;
464464
}
465465

466-
result = opal_accelerator_cuda_func.cuMemGetAddressRange((CUdeviceptr *) base, size, (CUdeviceptr) ptr);
466+
result = cuMemGetAddressRange((CUdeviceptr *) base, size, (CUdeviceptr) ptr);
467467
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
468468
opal_show_help("help-accelerator-cuda.txt", "cuMemGetAddressRange failed 2", true,
469469
OPAL_PROC_MY_HOSTNAME, result, ptr);
@@ -483,7 +483,7 @@ static int accelerator_cuda_host_register(int dev_id, void *ptr, size_t size)
483483
return OPAL_ERR_BAD_PARAM;
484484
}
485485

486-
result = opal_accelerator_cuda_func.cuMemHostRegister(ptr, size, 0);
486+
result = cuMemHostRegister(ptr, size, 0);
487487
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
488488
opal_show_help("help-accelerator-cuda.txt", "cuMemHostRegister failed", true,
489489
ptr, size, OPAL_PROC_MY_HOSTNAME, result);
@@ -497,7 +497,7 @@ static int accelerator_cuda_host_unregister(int dev_id, void *ptr)
497497
{
498498
CUresult result;
499499
if (NULL != ptr) {
500-
result = opal_accelerator_cuda_func.cuMemHostUnregister(ptr);
500+
result = cuMemHostUnregister(ptr);
501501
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
502502
opal_show_help("help-accelerator-cuda.txt", "cuMemHostUnregister failed", true,
503503
ptr, OPAL_PROC_MY_HOSTNAME, result);
@@ -516,7 +516,7 @@ static int accelerator_cuda_get_device(int *dev_id)
516516
return OPAL_ERR_BAD_PARAM;
517517
}
518518

519-
result = opal_accelerator_cuda_func.cuCtxGetDevice(&cuDev);
519+
result = cuCtxGetDevice(&cuDev);
520520
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
521521
opal_show_help("help-accelerator-cuda.txt", "cuCtxGetDevice failed", true,
522522
result);
@@ -534,7 +534,7 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
534534
return OPAL_ERR_BAD_PARAM;
535535
}
536536

537-
result = opal_accelerator_cuda_func.cuDeviceCanAccessPeer(access, (CUdevice) dev1, (CUdevice) dev2);
537+
result = cuDeviceCanAccessPeer(access, (CUdevice) dev1, (CUdevice) dev2);
538538
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
539539
opal_show_help("help-accelerator-cuda.txt", "cuDeviceCanAccessPeer failed", true,
540540
OPAL_PROC_MY_HOSTNAME, result);
@@ -554,13 +554,13 @@ static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_acc
554554
{
555555
CUresult result;
556556
int enable = 1;
557-
result = opal_accelerator_cuda_func.cuPointerGetAttribute((unsigned long long *)buf_id, CU_POINTER_ATTRIBUTE_BUFFER_ID, (CUdeviceptr) addr);
557+
result = cuPointerGetAttribute((unsigned long long *)buf_id, CU_POINTER_ATTRIBUTE_BUFFER_ID, (CUdeviceptr) addr);
558558
if (OPAL_UNLIKELY(result != CUDA_SUCCESS)) {
559559
opal_show_help("help-accelerator-cuda.txt", "bufferID failed", true, OPAL_PROC_MY_HOSTNAME,
560560
result);
561561
return result;
562562
}
563-
result = opal_accelerator_cuda_func.cuPointerSetAttribute(&enable, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
563+
result = cuPointerSetAttribute(&enable, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
564564
(CUdeviceptr) addr);
565565
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
566566
opal_show_help("help-accelerator-cuda.txt", "cuPointerSetAttribute failed", true,

0 commit comments

Comments
 (0)