Skip to content

Commit 816acbd

Browse files
authored
Merge pull request #7840 from abouteiller/mpi-next/init-errh
MPI-4: Initial error handler
2 parents 60aa97b + 7118755 commit 816acbd

File tree

16 files changed

+469
-94
lines changed

16 files changed

+469
-94
lines changed

ompi/communicator/comm_init.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2017 The University of Tennessee and The University
6+
* Copyright (c) 2004-2020 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -135,8 +135,8 @@ int ompi_comm_init(void)
135135
ompi_mpi_comm_world.comm.c_remote_group = group;
136136
OBJ_RETAIN(ompi_mpi_comm_world.comm.c_remote_group);
137137
ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim((int)size);
138-
ompi_mpi_comm_world.comm.error_handler = &ompi_mpi_errors_are_fatal.eh;
139-
OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh );
138+
ompi_mpi_comm_world.comm.error_handler = ompi_initial_error_handler_eh;
139+
OBJ_RETAIN( ompi_mpi_comm_world.comm.error_handler );
140140
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm);
141141
opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world);
142142

@@ -188,8 +188,8 @@ int ompi_comm_init(void)
188188
ompi_mpi_comm_self.comm.c_local_group = group;
189189
ompi_mpi_comm_self.comm.c_remote_group = group;
190190
OBJ_RETAIN(ompi_mpi_comm_self.comm.c_remote_group);
191-
ompi_mpi_comm_self.comm.error_handler = &ompi_mpi_errors_are_fatal.eh;
192-
OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh );
191+
ompi_mpi_comm_self.comm.error_handler = ompi_initial_error_handler_eh;
192+
OBJ_RETAIN( ompi_mpi_comm_self.comm.error_handler );
193193
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm);
194194
opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self);
195195

@@ -214,8 +214,10 @@ int ompi_comm_init(void)
214214
ompi_mpi_comm_null.comm.c_contextid = 2;
215215
ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL;
216216

217+
/* unlike world, self, and parent, comm_null does not inherit the initial error
218+
* handler */
217219
ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh;
218-
OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh );
220+
OBJ_RETAIN( ompi_mpi_comm_null.comm.error_handler );
219221
opal_pointer_array_set_item (&ompi_mpi_communicators, 2, &ompi_mpi_comm_null);
220222

221223
opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL",
@@ -228,6 +230,8 @@ int ompi_comm_init(void)
228230
OBJ_RETAIN(&ompi_mpi_comm_null);
229231
OBJ_RETAIN(&ompi_mpi_group_null.group);
230232
OBJ_RETAIN(&ompi_mpi_errors_are_fatal.eh);
233+
/* During dyn_init, the comm_parent error handler will be set to the same
234+
* as comm_world (thus, the initial error handler). */
231235

232236
/* initialize communicator requests (for ompi_comm_idup) */
233237
ompi_comm_request_init ();

ompi/dpm/dpm.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
782782
int flag=0;
783783
char cwd[OPAL_PATH_MAX];
784784
char host[OPAL_MAX_INFO_VAL]; /*** should define OMPI_HOST_MAX ***/
785+
char init_errh[OPAL_MAX_INFO_VAL];
785786
char prefix[OPAL_MAX_INFO_VAL];
786787
char stdin_target[OPAL_MAX_INFO_VAL];
787788
char params[OPAL_MAX_INFO_VAL];
@@ -814,6 +815,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
814815
- "file": filename, where additional information is provided.
815816
- "soft": see page 92 of MPI-2.
816817
- "host": desired host where to spawn the processes
818+
- "mpi_initial_errhandler": the error handler attached to predefined communicators.
817819
Non-standard keys:
818820
- "hostfile": hostfile containing hosts where procs are
819821
to be spawned
@@ -968,6 +970,15 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
968970
}
969971
#endif
970972

973+
/* check for 'mpi_initial_errhandler' */
974+
ompi_info_get (array_of_info[i], "mpi_initial_errhandler", sizeof(init_errh) - 1, init_errh, &flag);
975+
if ( flag ) {
976+
/* this is set as an environment because it must be available
977+
* before pmix_init */
978+
opal_setenv("OMPI_MCA_mpi_initial_errhandler", init_errh, true, &app->env);
979+
continue;
980+
}
981+
971982
/* 'path', 'arch', 'file', 'soft' -- to be implemented */
972983

973984
/* non-standard keys

ompi/errhandler/errcode.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,17 @@ do { \
131131
opal_pointer_array_set_item(&ompi_mpi_errcodes, (ERRCODE), &(VAR)); \
132132
} while (0)
133133

134+
static opal_mutex_t errcode_init_lock = OPAL_MUTEX_STATIC_INIT;
135+
134136
int ompi_mpi_errcode_init (void)
135137
{
138+
opal_mutex_lock(&errcode_init_lock);
139+
if ( 0 != ompi_mpi_errcode_lastpredefined ) {
140+
/* Already initialized (presumably by an API call before MPI_init */
141+
opal_mutex_unlock(&errcode_init_lock);
142+
return OMPI_SUCCESS;
143+
}
144+
136145
/* Initialize the pointer array, which will hold the references to
137146
the error objects */
138147
OBJ_CONSTRUCT(&ompi_mpi_errcodes, opal_pointer_array_t);
@@ -223,6 +232,7 @@ int ompi_mpi_errcode_init (void)
223232
MPI_ERR_LASTCODE. So just start it as == MPI_ERR_LASTCODE. */
224233
ompi_mpi_errcode_lastused = MPI_ERR_LASTCODE;
225234
ompi_mpi_errcode_lastpredefined = MPI_ERR_LASTCODE;
235+
opal_mutex_unlock(&errcode_init_lock);
226236
return OMPI_SUCCESS;
227237
}
228238

@@ -231,6 +241,7 @@ int ompi_mpi_errcode_finalize(void)
231241
int i;
232242
ompi_mpi_errcode_t *errc;
233243

244+
opal_mutex_lock(&errcode_init_lock);
234245
for (i=ompi_mpi_errcode_lastpredefined+1; i<=ompi_mpi_errcode_lastused; i++) {
235246
/*
236247
* there are some user defined error-codes, which
@@ -317,6 +328,8 @@ int ompi_mpi_errcode_finalize(void)
317328
OBJ_DESTRUCT(&ompi_t_err_invalid_name);
318329

319330
OBJ_DESTRUCT(&ompi_mpi_errcodes);
331+
ompi_mpi_errcode_lastpredefined = 0;
332+
opal_mutex_unlock(&errcode_init_lock);
320333
return OMPI_SUCCESS;
321334
}
322335

ompi/errhandler/errcode.h

Lines changed: 86 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2007 The University of Tennessee and The University
6+
* Copyright (c) 2004-2020 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -55,11 +55,68 @@ OMPI_DECLSPEC extern int ompi_mpi_errcode_lastpredefined;
5555

5656
OMPI_DECLSPEC extern ompi_mpi_errcode_t ompi_err_unknown;
5757

58+
/**
59+
* Initialize the error codes
60+
*
61+
* @returns OMPI_SUCCESS Upon success
62+
* @returns OMPI_ERROR Otherwise
63+
*
64+
* Invoked from ompi_mpi_init(); sets up all static MPI error codes,
65+
*/
66+
int ompi_mpi_errcode_init(void);
67+
68+
/**
69+
* Finalize the error codes.
70+
*
71+
* @returns OMPI_SUCCESS Always
72+
*
73+
* Invokes from ompi_mpi_finalize(); tears down the error code array.
74+
*/
75+
int ompi_mpi_errcode_finalize(void);
76+
77+
/**
78+
* Add an error code
79+
*
80+
* @param: error class to which this new error code belongs to
81+
*
82+
* @returns the new error code on SUCCESS (>0)
83+
* @returns OMPI_ERROR otherwise
84+
*
85+
*/
86+
int ompi_mpi_errcode_add (int errclass);
87+
88+
/**
89+
* Add an error class
90+
*
91+
* @param: none
92+
*
93+
* @returns the new error class on SUCCESS (>0)
94+
* @returns OMPI_ERROR otherwise
95+
*
96+
*/
97+
int ompi_mpi_errclass_add (void);
98+
99+
/**
100+
* Add an error string to an error code
101+
*
102+
* @param: error code for which the string is defined
103+
* @param: error string to add
104+
* @param: length of the string
105+
*
106+
* @returns OMPI_SUCCESS on success
107+
* @returns OMPI_ERROR on error
108+
*/
109+
int ompi_mpi_errnum_add_string (int errnum, const char* string, int len);
110+
58111
/**
59112
* Check for a valid error code
60113
*/
61114
static inline bool ompi_mpi_errcode_is_invalid(int errcode)
62115
{
116+
if (OPAL_UNLIKELY( 0 == ompi_mpi_errcode_lastpredefined )) {
117+
ompi_mpi_errcode_init();
118+
}
119+
63120
if ( errcode >= 0 && errcode <= ompi_mpi_errcode_lastused )
64121
return 0;
65122
else
@@ -73,23 +130,31 @@ static inline int ompi_mpi_errcode_get_class (int errcode)
73130
{
74131
ompi_mpi_errcode_t *err = NULL;
75132

133+
if (OPAL_UNLIKELY( 0 == ompi_mpi_errcode_lastpredefined )) {
134+
ompi_mpi_errcode_init();
135+
}
136+
76137
if (errcode >= 0) {
77138
err = (ompi_mpi_errcode_t *)opal_pointer_array_get_item(&ompi_mpi_errcodes, errcode);
78139
/* If we get a bogus errcode, return MPI_ERR_UNKNOWN */
79140
}
80141

81142
if (NULL != err) {
82-
if ( err->code != MPI_UNDEFINED ) {
83-
return err->cls;
84-
}
143+
if ( err->code != MPI_UNDEFINED ) {
144+
return err->cls;
145+
}
85146
}
86147
return ompi_err_unknown.cls;
87148
}
88149

89150
static inline int ompi_mpi_errcode_is_predefined ( int errcode )
90151
{
152+
if (OPAL_UNLIKELY( 0 == ompi_mpi_errcode_lastpredefined )) {
153+
ompi_mpi_errcode_init();
154+
}
155+
91156
if ( errcode >= 0 && errcode <= ompi_mpi_errcode_lastpredefined )
92-
return true;
157+
return true;
93158

94159
return false;
95160
}
@@ -98,23 +163,27 @@ static inline int ompi_mpi_errnum_is_class ( int errnum )
98163
{
99164
ompi_mpi_errcode_t *err;
100165

166+
if (OPAL_UNLIKELY( 0 == ompi_mpi_errcode_lastpredefined )) {
167+
ompi_mpi_errcode_init();
168+
}
169+
101170
if (errnum < 0) {
102171
return false;
103172
}
104173

105174
if ( errnum <= ompi_mpi_errcode_lastpredefined ) {
106-
/* Predefined error values represent an error code and
107-
an error class at the same time */
108-
return true;
175+
/* Predefined error values represent an error code and
176+
an error class at the same time */
177+
return true;
109178
}
110179

111180
err = (ompi_mpi_errcode_t *)opal_pointer_array_get_item(&ompi_mpi_errcodes, errnum);
112181
if (NULL != err) {
113-
if ( MPI_UNDEFINED == err->code) {
114-
/* Distinction between error class and error code is that for the
115-
first one the code section is set to MPI_UNDEFINED */
116-
return true;
117-
}
182+
if ( MPI_UNDEFINED == err->code) {
183+
/* Distinction between error class and error code is that for the
184+
first one the code section is set to MPI_UNDEFINED */
185+
return true;
186+
}
118187
}
119188

120189
return false;
@@ -128,6 +197,10 @@ static inline char* ompi_mpi_errnum_get_string (int errnum)
128197
{
129198
ompi_mpi_errcode_t *err = NULL;
130199

200+
if (OPAL_UNLIKELY( 0 == ompi_mpi_errcode_lastpredefined )) {
201+
ompi_mpi_errcode_init();
202+
}
203+
131204
if (errnum >= 0) {
132205
err = (ompi_mpi_errcode_t *)opal_pointer_array_get_item(&ompi_mpi_errcodes, errnum);
133206
/* If we get a bogus errcode, return a string indicating that this
@@ -142,59 +215,6 @@ static inline char* ompi_mpi_errnum_get_string (int errnum)
142215
}
143216

144217

145-
/**
146-
* Initialize the error codes
147-
*
148-
* @returns OMPI_SUCCESS Upon success
149-
* @returns OMPI_ERROR Otherwise
150-
*
151-
* Invoked from ompi_mpi_init(); sets up all static MPI error codes,
152-
*/
153-
int ompi_mpi_errcode_init(void);
154-
155-
/**
156-
* Finalize the error codes.
157-
*
158-
* @returns OMPI_SUCCESS Always
159-
*
160-
* Invokes from ompi_mpi_finalize(); tears down the error code array.
161-
*/
162-
int ompi_mpi_errcode_finalize(void);
163-
164-
/**
165-
* Add an error code
166-
*
167-
* @param: error class to which this new error code belongs to
168-
*
169-
* @returns the new error code on SUCCESS (>0)
170-
* @returns OMPI_ERROR otherwise
171-
*
172-
*/
173-
int ompi_mpi_errcode_add (int errclass);
174-
175-
/**
176-
* Add an error class
177-
*
178-
* @param: none
179-
*
180-
* @returns the new error class on SUCCESS (>0)
181-
* @returns OMPI_ERROR otherwise
182-
*
183-
*/
184-
int ompi_mpi_errclass_add (void);
185-
186-
/**
187-
* Add an error string to an error code
188-
*
189-
* @param: error code for which the string is defined
190-
* @param: error string to add
191-
* @param: length of the string
192-
*
193-
* @returns OMPI_SUCCESS on success
194-
* @returns OMPI_ERROR on error
195-
*/
196-
int ompi_mpi_errnum_add_string (int errnum, const char* string, int len);
197-
198218
END_C_DECLS
199219

200220
#endif /* OMPI_MPI_ERRCODE_H */

0 commit comments

Comments
 (0)