Skip to content

Commit 3cd85a9

Browse files
committed
Add the initial_errhandler info key to MPI_INFO_ENV and populate the
value from prun populated paremeters Signed-off-by: Aurélien Bouteiller <bouteill@icl.utk.edu> Allow errhandlers to invoke the initial error handler before MPI_INIT Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu> Indentation Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu>
1 parent 703b8c3 commit 3cd85a9

File tree

7 files changed

+95
-10
lines changed

7 files changed

+95
-10
lines changed

ompi/errhandler/errhandler.c

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,55 @@ ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptions = {{{0}}};
7878
ompi_predefined_errhandler_t *ompi_mpi_errors_throw_exceptions_addr =
7979
&ompi_mpi_errors_throw_exceptions;
8080

81+
static opal_mutex_t errhandler_init_lock = OPAL_MUTEX_STATIC_INIT;
82+
ompi_errhandler_t* ompi_initial_error_handler_eh = NULL;
83+
void (*ompi_initial_error_handler)(struct ompi_communicator_t **comm, int *error_code, ...) = NULL;
84+
85+
/*
86+
* Initialize the initial errhandler infrastructure only.
87+
* This does not allocate any memory and does not require a corresponding fini.
88+
*/
89+
int ompi_initial_errhandler_init(void) {
90+
opal_mutex_lock(&errhandler_init_lock);
91+
if ( NULL != ompi_initial_error_handler ) {
92+
/* Already initialized (presumably by an API call before MPI_init) */
93+
opal_mutex_unlock(&errhandler_init_lock);
94+
return OMPI_SUCCESS;
95+
}
96+
97+
/* If it has been requested from the launch keys, set the initial
98+
* error handler that will be attached by default with predefined
99+
* communicators. We use an env because that can be obtained before
100+
* OPAL and PMIx initialization.
101+
*/
102+
char *env = getenv("OMPI_MCA_mpi_initial_errhandler");
103+
if( NULL != env ) {
104+
if( 0 == strcasecmp(env, "mpi_errors_are_fatal") ) {
105+
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
106+
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
107+
}
108+
else if( 0 == strcasecmp(env, "mpi_errors_abort") ) {
109+
ompi_initial_error_handler = &ompi_mpi_errors_abort_comm_handler;
110+
ompi_initial_error_handler_eh = &ompi_mpi_errors_abort.eh;
111+
}
112+
else if( 0 == strcasecmp(env, "mpi_errors_return") ) {
113+
ompi_initial_error_handler = &ompi_mpi_errors_return_comm_handler;
114+
ompi_initial_error_handler_eh = &ompi_mpi_errors_return.eh;
115+
}
116+
else {
117+
/* invalid entry detected, ignore it, set fatal by default */
118+
opal_output(0, "WARNING: invalid value for launch key 'mpi_initial_errhandler'; defaulting to 'mpi_errors_are_fatal'.");
119+
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
120+
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
121+
}
122+
}
123+
else {
124+
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
125+
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
126+
}
127+
opal_mutex_unlock(&errhandler_init_lock);
128+
return OMPI_SUCCESS;
129+
}
81130

82131
/*
83132
* Initialize OMPI errhandler infrastructure
@@ -163,9 +212,12 @@ int ompi_errhandler_init(void)
163212
"MPI_ERRORS_THROW_EXCEPTIONS",
164213
sizeof(ompi_mpi_errors_throw_exceptions.eh.eh_name));
165214

166-
/* All done */
167-
168-
return OMPI_SUCCESS;
215+
/* Lets initialize the initial error handler if not already done */
216+
char *env = getenv("OMPI_MCA_mpi_initial_errhandler");
217+
if( NULL != env ) {
218+
ompi_process_info.initial_errhandler = strndup(env, MPI_MAX_INFO_VAL);
219+
}
220+
return ompi_initial_errhandler_init();
169221
}
170222

171223

ompi/errhandler/errhandler.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,26 @@ OMPI_DECLSPEC extern ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptio
185185
*/
186186
OMPI_DECLSPEC extern opal_pointer_array_t ompi_errhandler_f_to_c_table;
187187

188+
/**
189+
* This function selects the initial error handler.
190+
* It may be called during MPI_INIT, or during the first MPI call
191+
* that raises an error. This function does not allocate memory,
192+
* and will only populate the ompi_initial_error_handler_eh and
193+
* ompi_initial_error_handler pointers with predefined error handler
194+
* and error handler functions aliases.
195+
*/
196+
OMPI_DECLSPEC int ompi_initial_errhandler_init(void);
197+
/**
198+
* The initial error handler pointer. Will be set to alias one of the
199+
* predefined error handlers through launch keys during the first MPI call,
200+
* and will then be attached to predefined communicators.
201+
*/
202+
OMPI_DECLSPEC extern ompi_errhandler_t* ompi_initial_error_handler_eh;
203+
/**
204+
* The initial error handler function pointer. Will be called when an error
205+
* is raised before MPI_INIT or after MPI_FINALIZE.
206+
*/
207+
OMPI_DECLSPEC extern void (*ompi_initial_error_handler)(struct ompi_communicator_t **comm, int *error_code, ...);
188208

189209
/**
190210
* Forward declaration so that we don't have to include

ompi/info/info.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,12 @@ int ompi_mpiinfo_init(void)
131131
opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr);
132132
free(cptr);
133133

134+
/* the initial error handler, set it as requested (nothing if not
135+
* requested) */
136+
if (NULL != ompi_process_info.initial_errhandler) {
137+
opal_info_set(&ompi_mpi_info_env.info.super, "mpi_initial_errhandler", ompi_process_info.initial_errhandler);
138+
}
139+
134140
/* local host name */
135141
opal_info_set(&ompi_mpi_info_env.info.super, "host", ompi_process_info.nodename);
136142

ompi/runtime/ompi_mpi_init.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -751,12 +751,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
751751
goto error;
752752
}
753753

754-
/* initialize info */
755-
if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) {
756-
error = "ompi_info_init() failed";
757-
goto error;
758-
}
759-
760754
/* initialize error handlers */
761755
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
762756
error = "ompi_errhandler_init() failed";
@@ -775,6 +769,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
775769
goto error;
776770
}
777771

772+
/* initialize info */
773+
if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) {
774+
error = "ompi_info_init() failed";
775+
goto error;
776+
}
777+
778778
/* initialize groups */
779779
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
780780
error = "ompi_group_init() failed";

ompi/runtime/ompi_rte.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,11 @@ int ompi_rte_finalize(void)
935935
opal_process_info.initial_wdir = NULL;
936936
}
937937

938+
if (NULL != opal_process_info.initial_errhandler) {
939+
free(opal_process_info.initial_errhandler);
940+
opal_process_info.initial_errhandler = NULL;
941+
}
942+
938943
/* cleanup our internal nspace hack */
939944
opal_pmix_finalize_nspace_tracker();
940945

opal/util/proc.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ opal_process_info_t opal_process_info = {
5151
.num_apps = 0,
5252
.initial_wdir = NULL,
5353
.reincarnation = 0,
54-
.proc_is_bound = false
54+
.proc_is_bound = false,
55+
.initial_errhandler = NULL,
5556
};
5657

5758
static opal_proc_t opal_local_proc = {

opal/util/proc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ typedef struct opal_process_info_t {
126126
char *initial_wdir;
127127
uint32_t reincarnation;
128128
bool proc_is_bound;
129+
char *initial_errhandler;
129130
} opal_process_info_t;
130131
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;
131132

0 commit comments

Comments
 (0)