Skip to content

Commit 5d91862

Browse files
authored
Merge pull request #11613 from PedramAlizadeh/main
Added lazy initialization of rocm component
2 parents a93237b + 8933311 commit 5d91862

File tree

3 files changed

+44
-7
lines changed

3 files changed

+44
-7
lines changed

opal/mca/accelerator/rocm/accelerator_rocm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535

3636
#include "opal/mca/accelerator/accelerator.h"
37+
#include "opal/mca/threads/mutex.h"
3738

3839
typedef struct {
3940
opal_accelerator_base_component_t super;
@@ -60,4 +61,6 @@ OPAL_DECLSPEC extern int opal_accelerator_rocm_verbose;
6061
OPAL_DECLSPEC extern size_t opal_accelerator_rocm_memcpyH2D_limit;
6162
OPAL_DECLSPEC extern size_t opal_accelerator_rocm_memcpyD2H_limit;
6263

64+
OPAL_DECLSPEC extern int opal_accelerator_rocm_lazy_init(void);
65+
6366
#endif

opal/mca/accelerator/rocm/accelerator_rocm_component.c

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ int opal_accelerator_rocm_verbose = 0;
2828
size_t opal_accelerator_rocm_memcpyD2H_limit=1024;
2929
size_t opal_accelerator_rocm_memcpyH2D_limit=1048576;
3030

31+
/* Initialization lock for lazy rocm initialization */
32+
static opal_mutex_t accelerator_rocm_init_lock;
33+
static bool accelerator_rocm_init_complete = false;
34+
3135
hipStream_t opal_accelerator_rocm_MemcpyStream = NULL;
3236

3337
/*
@@ -154,8 +158,42 @@ static int accelerator_rocm_component_register(void)
154158
return OPAL_SUCCESS;
155159
}
156160

161+
int opal_accelerator_rocm_lazy_init()
162+
{
163+
int err = OPAL_SUCCESS;
164+
165+
/* Double checked locking to avoid having to
166+
* grab locks post lazy-initialization. */
167+
opal_atomic_rmb();
168+
if (true == accelerator_rocm_init_complete) {
169+
return OPAL_SUCCESS;
170+
}
171+
OPAL_THREAD_LOCK(&accelerator_rocm_init_lock);
172+
173+
/* If already initialized, just exit */
174+
if (true == accelerator_rocm_init_complete) {
175+
goto out;
176+
}
177+
178+
err = hipStreamCreate(&opal_accelerator_rocm_MemcpyStream);
179+
if (hipSuccess != err) {
180+
opal_output(0, "Could not create hipStream, err=%d %s\n",
181+
err, hipGetErrorString(err));
182+
goto out;
183+
}
184+
185+
err = OPAL_SUCCESS;
186+
opal_atomic_wmb();
187+
accelerator_rocm_init_complete = true;
188+
out:
189+
OPAL_THREAD_UNLOCK(&accelerator_rocm_init_lock);
190+
return err;
191+
}
192+
157193
static opal_accelerator_base_module_t* accelerator_rocm_init(void)
158194
{
195+
OBJ_CONSTRUCT(&accelerator_rocm_init_lock, opal_mutex_t);
196+
159197
hipError_t err;
160198

161199
if (opal_rocm_runtime_initialized) {
@@ -169,13 +207,6 @@ static opal_accelerator_base_module_t* accelerator_rocm_init(void)
169207
return NULL;
170208
}
171209

172-
err = hipStreamCreate(&opal_accelerator_rocm_MemcpyStream);
173-
if (hipSuccess != err) {
174-
opal_output(0, "Could not create hipStream, err=%d %s\n",
175-
err, hipGetErrorString(err));
176-
return NULL;
177-
}
178-
179210
opal_atomic_mb();
180211
opal_rocm_runtime_initialized = true;
181212

@@ -192,5 +223,6 @@ static void accelerator_rocm_finalize(opal_accelerator_base_module_t* module)
192223
opal_accelerator_rocm_MemcpyStream = NULL;
193224
}
194225

226+
OBJ_DESTRUCT(&accelerator_rocm_init_lock);
195227
return;
196228
}

opal/mca/accelerator/rocm/accelerator_rocm_module.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ static int mca_accelerator_rocm_check_addr (const void *addr, int *dev_id, uint6
8787
//We might want to set additional flags in a later iteration.
8888
//*flags |= MCA_ACCELERATOR_FLAGS_HOST_LDSTR;
8989
//*flags |= MCA_ACCELERATOR_FLAGS_HOST_ATOMICS;
90+
/* First access on a device pointer triggers ROCM support lazy initialization. */
91+
opal_accelerator_rocm_lazy_init();
9092
ret = 1;
9193
} else if (hipMemoryTypeUnified == srcAttr.memoryType) {
9294
*flags |= MCA_ACCELERATOR_FLAGS_UNIFIED_MEMORY;

0 commit comments

Comments
 (0)