Skip to content

Commit 8dc8003

Browse files
committed
accelerator/rocm: minor updates
two minor updates: - exclude a header file that is not require anymore - change the default option to use (blocking) hipMemcpy vs. hipMemcpyAsync, it seems to perform better on most ROCm releases that we care about. Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent a0e9b01 commit 8dc8003

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

opal/mca/accelerator/rocm/accelerator_rocm_component.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* reserved.
77
* Copyright (c) 2017-2022 Amazon.com, Inc. or its affiliates.
88
* All Rights reserved.
9-
* Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights reserved.
9+
* Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights reserved.
1010
* $COPYRIGHT$
1111
*
1212
* Additional copyrights may follow
@@ -17,13 +17,12 @@
1717
#include "opal_config.h"
1818

1919
#include <stdio.h>
20-
#include <dlfcn.h>
2120

2221
#include "opal/mca/dl/base/base.h"
2322
#include "opal/runtime/opal_params.h"
2423
#include "accelerator_rocm.h"
2524

26-
int opal_accelerator_rocm_memcpy_async = 1;
25+
int opal_accelerator_rocm_memcpy_async = 0;
2726
int opal_accelerator_rocm_verbose = 0;
2827
size_t opal_accelerator_rocm_memcpyD2H_limit=1024;
2928
size_t opal_accelerator_rocm_memcpyH2D_limit=1048576;
@@ -149,9 +148,9 @@ static int accelerator_rocm_component_register(void)
149148
&opal_accelerator_rocm_memcpyH2D_limit);
150149

151150
/* Use this flag to test async vs sync copies */
152-
opal_accelerator_rocm_memcpy_async = 1;
151+
opal_accelerator_rocm_memcpy_async = 0;
153152
(void) mca_base_var_register("ompi", "mpi", "accelerator_rocm", "memcpy_async",
154-
"Set to 0 to force using hipMemcpy instead of hipMemcpyAsync",
153+
"Set to 1 to force using hipMemcpyAsync instead of hipMemcpy",
155154
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9,
156155
MCA_BASE_VAR_SCOPE_READONLY, &opal_accelerator_rocm_memcpy_async);
157156

0 commit comments

Comments
 (0)