@@ -290,6 +290,23 @@ ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type,
290
290
return new_errhandler ;
291
291
}
292
292
293
+ /* helper to move the error report back from the RTE thread to the MPI thread */
294
+ typedef struct ompi_errhandler_event_s {
295
+ opal_event_t super ;
296
+ opal_process_name_t procname ;
297
+ int status ;
298
+ } ompi_errhandler_event_t ;
299
+
300
+ static void * ompi_errhandler_event_cb (int fd , int flags , void * context ) {
301
+ ompi_errhandler_event_t * event = (ompi_errhandler_event_t * ) context ;
302
+ int status = event -> status ;
303
+ opal_event_del (& event -> super );
304
+ free (event );
305
+ /* our default action is to abort */
306
+ OMPI_ERRHANDLER_NOHANDLE_INVOKE (status , "PMIx Event notification" );
307
+ return NULL ;
308
+ }
309
+
293
310
/* registration callback */
294
311
void ompi_errhandler_registration_callback (int status ,
295
312
size_t errhandler_ref ,
@@ -312,13 +329,37 @@ void ompi_errhandler_callback(size_t refid, pmix_status_t status,
312
329
pmix_event_notification_cbfunc_fn_t cbfunc ,
313
330
void * cbdata )
314
331
{
332
+ int rc ;
333
+ /* an error has been found, report to the MPI layer and let it take
334
+ * further action. */
335
+ /* transition this from the RTE thread to the MPI progress engine */
336
+ ompi_errhandler_event_t * event = malloc (sizeof (* event ));
337
+ if (NULL == event ) {
338
+ OMPI_ERROR_LOG (OMPI_ERR_OUT_OF_RESOURCE );
339
+ goto error ;
340
+ }
341
+ OPAL_PMIX_CONVERT_PROCT (rc , & event -> procname , (pmix_proc_t * )source );
342
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != rc )) {
343
+ OMPI_ERROR_LOG (rc );
344
+ free (event );
345
+ goto error ;
346
+ }
347
+ event -> status = status ;
348
+ opal_event_set (opal_sync_event_base , & event -> super , -1 , OPAL_EV_READ ,
349
+ ompi_errhandler_event_cb , event );
350
+ opal_event_active (& event -> super , OPAL_EV_READ , 1 );
315
351
/* tell the event chain engine to go no further - we
316
352
* will handle this */
317
353
if (NULL != cbfunc ) {
318
354
cbfunc (PMIX_EVENT_ACTION_COMPLETE , NULL , 0 , NULL , NULL , cbdata );
319
355
}
320
- /* our default action is to abort */
321
- OMPI_ERRHANDLER_NOHANDLE_INVOKE (status , "PMIx Event notification" );
356
+ return ;
357
+
358
+ error :
359
+ if (NULL != cbfunc ) {
360
+ /* We can't handle this, let the default action abort. */
361
+ cbfunc (PMIX_EVENT_NO_ACTION_TAKEN , NULL , 0 , NULL , NULL , cbdata );
362
+ }
322
363
}
323
364
324
365
/**************************************************************************
0 commit comments