Skip to content

Commit 893270c

Browse files
committed
orte: send error messages to stderr.
When a job terminates normally but with a non zero exit code, display the error message to stderr. Thanks Emre Brookes for the bug report. Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
1 parent b83ad04 commit 893270c

File tree

5 files changed

+33
-12
lines changed

5 files changed

+33
-12
lines changed

orte/mca/errmgr/base/help-errmgr-base.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# All rights reserved.
1313
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1414
# Copyright (c) 2017 IBM Corporation. All rights reserved.
15+
# Copyright (c) 2018 Research Organization for Information Science
16+
# and Technology (RIST). All rights reserved.
1517
# $COPYRIGHT$
1618
#
1719
# Additional copyrights may follow
@@ -105,3 +107,6 @@ An internal error has occurred in ORTE:
105107
%s
106108

107109
This is something that should be reported to the developers.
110+
#
111+
[normal-termination-but]
112+
%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.

orte/mca/errmgr/default_hnp/errmgr_default_hnp.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
* All rights reserved.
1212
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
1313
* Copyright (c) 2017 IBM Corporation. All rights reserved.
14+
* Copyright (c) 2018 Research Organization for Information Science
15+
* and Technology (RIST). All rights reserved.
1416
* $COPYRIGHT$
1517
*
1618
* Additional copyrights may follow
@@ -839,14 +841,11 @@ static void default_hnp_abort(orte_job_t *jdata)
839841
i32ptr = &i32;
840842
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) {
841843
/* warn user */
842-
opal_output(orte_clean_output,
843-
"-------------------------------------------------------\n"
844-
"%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n"
845-
"-------------------------------------------------------",
846-
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
847-
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
848-
i32, (1 == i32) ? "process returned\na non-zero exit code" :
849-
"processes returned\nnon-zero exit codes");
844+
orte_show_help("help-errmgr-base.txt", "normal-termination-but", true,
845+
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
846+
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
847+
i32, (1 == i32) ? "process returned\na non-zero exit code" :
848+
"processes returned\nnon-zero exit codes");
850849
}
851850

852851
OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,

orte/mca/state/base/Makefile.am

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
#
22
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
33
# All rights reserved.
4+
# Copyright (c) 2018 Research Organization for Information Science
5+
# and Technology (RIST). All rights reserved.
46
# $COPYRIGHT$
57
#
68
# Additional copyrights may follow
79
#
810
# $HEADER$
911
#
1012

13+
dist_ortedata_DATA += base/help-state-base.txt
14+
1115
headers += \
1216
base/state_private.h \
1317
base/base.h
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# -*- text -*-
2+
#
3+
# Copyright (c) 2018 Research Organization for Information Science
4+
# and Technology (RIST). All rights reserved.
5+
# $COPYRIGHT$
6+
#
7+
# Additional copyrights may follow
8+
#
9+
# $HEADER$
10+
#
11+
#
12+
[normal-termination-but]
13+
While %s job %s terminated normally, %d %s. Further examination may be required.

orte/mca/state/base/state_base_fns.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
/*
22
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
33
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
4+
* Copyright (c) 2018 Research Organization for Information Science
5+
* and Technology (RIST). All rights reserved.
46
* $COPYRIGHT$
57
*
68
* Additional copyrights may follow
@@ -38,6 +40,7 @@
3840
#include "orte/mca/routed/routed.h"
3941
#include "orte/util/session_dir.h"
4042
#include "orte/util/threads.h"
43+
#include "orte/util/show_help.h"
4144

4245
#include "orte/mca/state/base/base.h"
4346
#include "orte/mca/state/base/state_private.h"
@@ -847,10 +850,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
847850
}
848851

849852
/* warn user */
850-
opal_output(orte_clean_output,
851-
"-------------------------------------------------------\n"
852-
"While %s job %s terminated normally, %d %s. Further examination may be required.\n"
853-
"-------------------------------------------------------",
853+
orte_show_help("help-state-base.txt", "normal-termination-but", true,
854854
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
855855
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
856856
i32, (1 == i32) ? "process returned\na non-zero exit code." :

0 commit comments

Comments
 (0)