Skip to content

Commit 4588377

Browse files
authored
Merge pull request #10346 from jjhursey/fix-prot-finalize
hook/comm_method: Fix segv when not fully conected
2 parents 51af452 + 056b742 commit 4588377

File tree

3 files changed

+30
-23
lines changed

3 files changed

+30
-23
lines changed

ompi/mca/hook/comm_method/hook_comm_method.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2020 IBM Corporation. All rights reserved.
2+
* Copyright (c) 2016-2022 IBM Corporation. All rights reserved.
33
* $COPYRIGHT$
44
*
55
* Additional copyrights may follow
@@ -18,14 +18,14 @@
1818

1919
BEGIN_C_DECLS
2020

21-
OMPI_MODULE_DECLSPEC extern const ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component;
21+
OMPI_MODULE_DECLSPEC extern ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component;
2222

2323
extern int mca_hook_comm_method_verbose;
2424
extern int mca_hook_comm_method_output;
2525
extern bool mca_hook_comm_method_enable_mpi_init;
2626
extern bool mca_hook_comm_method_enable_mpi_finalize;
2727
extern int mca_hook_comm_method_max;
28-
extern int mca_hook_comm_method_brief;
28+
extern bool mca_hook_comm_method_brief;
2929
extern char *mca_hook_comm_method_fakefile;
3030

3131
void ompi_hook_comm_method_mpi_init_bottom(int argc, char **argv, int requested, int *provided);

ompi/mca/hook/comm_method/hook_comm_method_component.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2020 IBM Corporation. All rights reserved.
2+
* Copyright (c) 2016-2022 IBM Corporation. All rights reserved.
33
* $COPYRIGHT$
44
*
55
* Additional copyrights may follow
@@ -25,7 +25,7 @@ const char *mca_hook_comm_method_component_version_string =
2525
* Instantiate the public struct with all of our public information
2626
* and pointers to our public functions in it
2727
*/
28-
const ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component = {
28+
ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component = {
2929

3030
/* First, the mca_component_t struct containing meta information
3131
* about the component itself */
@@ -79,7 +79,7 @@ bool mca_hook_comm_method_enable_mpi_init = false;
7979
bool mca_hook_comm_method_enable_mpi_finalize = false;
8080
uint32_t mca_hook_comm_method_enabled_flags = 0x00;
8181
int mca_hook_comm_method_max = 12;
82-
int mca_hook_comm_method_brief = 0;
82+
bool mca_hook_comm_method_brief = false;
8383
char *mca_hook_comm_method_fakefile = NULL;
8484

8585
static mca_base_var_enum_value_flag_t mca_hook_comm_method_modes[] = {
@@ -174,7 +174,7 @@ static int ompi_hook_comm_method_component_register(void)
174174
// hook_comm_method_brief
175175
(void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "brief",
176176
"Only print the comm method summary, skip the 2d table.",
177-
MCA_BASE_VAR_TYPE_INT, NULL,
177+
MCA_BASE_VAR_TYPE_BOOL, NULL,
178178
0, 0,
179179
OPAL_INFO_LVL_3,
180180
MCA_BASE_VAR_SCOPE_READONLY,

ompi/mca/hook/comm_method/hook_comm_method_fns.c

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2020 IBM Corporation. All rights reserved.
2+
* Copyright (c) 2016-2022 IBM Corporation. All rights reserved.
33
* $COPYRIGHT$
44
*
55
* Additional copyrights may follow
@@ -22,11 +22,6 @@
2222
#include "ompi/mca/bml/base/base.h"
2323
#include "ompi/mca/mtl/base/base.h"
2424

25-
static void
26-
mystrncpy(char *to, const char *from, int n) {
27-
snprintf(to, n, "%s", from);
28-
}
29-
3025
// For converting comm_method strings to comm_method id# and back.
3126
// This starts as our local set of strings, but gets Allreduced into
3227
// a global mapping so all the strings at all the ranks are represented.
@@ -44,6 +39,9 @@ static comm_method_string_conversion_t comm_method_string_conversion;
4439
#define MODE_IS_MTL 2
4540
#define MODE_IS_BTL 3
4641

42+
#define CALLED_FROM_MPI_INIT 1
43+
#define CALLED_FROM_MPI_FINALIZE 2
44+
4745
// ----------------------------------------------------------------------------
4846

4947
// return the pml's module:component:name function pointer in fp
@@ -64,6 +62,10 @@ static char*
6462
lookup_btl_name_for_send(ompi_communicator_t* comm, int rank) {
6563
ompi_proc_t *dst_proc = ompi_group_peer_lookup_existing(comm->c_remote_group, rank);
6664

65+
if (NULL == dst_proc) {
66+
return NULL;
67+
}
68+
6769
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint(dst_proc);
6870
if (endpoint &&
6971
endpoint->btl_send.bml_btls &&
@@ -84,22 +86,27 @@ lookup_btl_name_for_send(ompi_communicator_t* comm, int rank) {
8486
// that the caller has to free.
8587
static char *
8688
comm_method_string(MPI_Comm comm, int rank, int *comm_mode) {
87-
char *p;
89+
char *p, *btl;
8890
char *string = malloc(COMM_METHOD_STRING_SIZE);
8991

9092
if (!string) { return NULL; }
9193

9294
p = lookup_pml_name();
9395
if (p && 0==strncmp("ob1", p, 4)) { // BTL
9496
if (comm_mode) { *comm_mode = MODE_IS_BTL; }
95-
mystrncpy(string, lookup_btl_name_for_send(comm, rank), COMM_METHOD_STRING_SIZE);
97+
btl = lookup_btl_name_for_send(comm, rank);
98+
if (NULL == btl) {
99+
strncpy(string, "n/a", COMM_METHOD_STRING_SIZE);
100+
} else {
101+
strncpy(string, btl, COMM_METHOD_STRING_SIZE);
102+
}
96103
}
97104
else if (p && 0==strncmp("cm", p, 3)) { // MTL
98105
if (comm_mode) { *comm_mode = MODE_IS_MTL; }
99-
mystrncpy(string, lookup_mtl_name(), COMM_METHOD_STRING_SIZE);
106+
strncpy(string, lookup_mtl_name(), COMM_METHOD_STRING_SIZE);
100107
} else { // PML
101108
if (comm_mode) { *comm_mode = MODE_IS_PML; }
102-
mystrncpy(string, p, COMM_METHOD_STRING_SIZE);
109+
strncpy(string, p, COMM_METHOD_STRING_SIZE);
103110
}
104111
return string;
105112
}
@@ -144,12 +151,12 @@ static void
144151
add_string_to_conversion_struct(comm_method_string_conversion_t *data, char *string)
145152
{
146153
int i;
147-
if (0 == strcmp(string, "n/a")) { return; }
154+
if (NULL == string || 0 == strcmp(string, "n/a")) { return; }
148155

149156
i = lookup_string_in_conversion_struct(data, string);
150157
if (i == 0) { // didn't find string in list, so add it
151158
if (data->n < MAX_COMM_METHODS) {
152-
mystrncpy(data->str[data->n], string, COMM_METHOD_STRING_SIZE);
159+
strncpy(data->str[data->n], string, COMM_METHOD_STRING_SIZE);
153160
++(data->n);
154161
}
155162
}
@@ -214,14 +221,14 @@ static void ompi_report_comm_methods(int called_from_location);
214221
void ompi_hook_comm_method_mpi_init_bottom(int argc, char **argv, int requested, int *provided)
215222
{
216223
if( mca_hook_comm_method_enable_mpi_init ) {
217-
ompi_report_comm_methods( 1 );
224+
ompi_report_comm_methods( CALLED_FROM_MPI_INIT );
218225
}
219226
}
220227

221228
void ompi_hook_comm_method_mpi_finalize_top(void)
222229
{
223230
if( mca_hook_comm_method_enable_mpi_finalize ) {
224-
ompi_report_comm_methods( 2 );
231+
ompi_report_comm_methods( CALLED_FROM_MPI_FINALIZE );
225232
}
226233
}
227234

@@ -312,7 +319,7 @@ abbreviate_list_into_string(char *str, int max, int *list, int nlist)
312319
// When activated from init: we establish connections before printing.
313320
// When activated from finalize: we just print whatever info is available.
314321
static void
315-
ompi_report_comm_methods(int called_from_location) // 1 = from init, 2 = from finalize
322+
ompi_report_comm_methods(int called_from_location)
316323
{
317324
int numhosts, i, j, k;
318325
int max2Dprottable = 12;
@@ -416,7 +423,7 @@ ompi_report_comm_methods(int called_from_location) // 1 = from init, 2 = from fi
416423

417424
// If we're running during init, establish connections between all peers
418425
// (in leader_comm, which is all the ranks that are here at this point)
419-
if (called_from_location == 1) {
426+
if (CALLED_FROM_MPI_INIT == called_from_location) {
420427
for (i=0; i<=nleaderranks/2; ++i) {
421428
// (Examples to show why the loop is i<=nleaderranks/2)
422429
// np4 : 0 1 2 3 i=0 0c0 i=1 0c0&1&3 i=2 0c0&1&3&2

0 commit comments

Comments
 (0)