@@ -48,6 +48,7 @@ static const char *dash_line
48
48
= "--------------------------------------------------------------------------\n" ;
49
49
static int output_stream = -1 ;
50
50
static char * * search_dirs = NULL ;
51
+ static bool opal_help_want_aggregate = true;
51
52
52
53
/*
53
54
* Local functions
@@ -58,13 +59,28 @@ static int opal_show_help_internal(const char *filename, const char *topic, int
58
59
...);
59
60
static void opal_show_help_finalize (void );
60
61
62
+ typedef struct {
63
+ pmix_info_t * info ;
64
+ pmix_info_t * dirs ;
65
+ char * msg ;
66
+ } opal_log_info_t ;
67
+
61
68
opal_show_help_fn_t opal_show_help = opal_show_help_internal ;
62
69
opal_show_vhelp_fn_t opal_show_vhelp = opal_show_vhelp_internal ;
63
70
64
71
int opal_show_help_init (void )
65
72
{
66
73
opal_output_stream_t lds ;
67
74
75
+ opal_help_want_aggregate = true;
76
+ mca_base_var_register ("opal" , NULL , "base" , "help_aggregate" ,
77
+ "If opal_base_help_aggregate is true, duplicate help messages will be aggregated rather "
78
+ "than displayed individually. This can be helpful for parallel jobs that experience "
79
+ "multiple identical failures; rather than print out the same help/failure message N times, "
80
+ "display it once with a count of how many processes sent the same message. Default: true." ,
81
+ MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 , OPAL_INFO_LVL_9 ,
82
+ MCA_BASE_VAR_SCOPE_LOCAL , & opal_help_want_aggregate );
83
+
68
84
OBJ_CONSTRUCT (& lds , opal_output_stream_t );
69
85
lds .lds_want_stderr = true;
70
86
output_stream = opal_output_open (& lds );
@@ -88,6 +104,58 @@ static void opal_show_help_finalize(void)
88
104
}
89
105
}
90
106
107
+ static void opal_show_help_cbfunc (pmix_status_t status , void * cbdata )
108
+ {
109
+ opal_log_info_t * info = (opal_log_info_t * ) cbdata ;
110
+ if (PMIX_SUCCESS != status && PMIX_OPERATION_SUCCEEDED != status ) {
111
+ // Aggregation/de-duplication functionality is *probably* lost,
112
+ // but let's print the error anyway since duplicate error messages
113
+ // is better than hiding it.
114
+ opal_output (output_stream , "%s" , info -> msg );
115
+ }
116
+ PMIX_INFO_DESTRUCT (info -> info );
117
+ if (info -> dirs ) {
118
+ PMIX_INFO_DESTRUCT (info -> dirs );
119
+ }
120
+ free (info -> msg );
121
+ free (info );
122
+ }
123
+
124
+ static void local_delivery (const char * file , const char * topic , char * msg ) {
125
+ pmix_info_t * info , * dirs ;
126
+ int ninfo = 0 , ndirs = 0 ;
127
+ PMIX_INFO_CREATE (info , 1 );
128
+ PMIX_INFO_LOAD (& info [ninfo ++ ], PMIX_LOG_STDERR , msg , PMIX_STRING );
129
+
130
+ opal_log_info_t * cbdata = calloc (1 , sizeof (opal_log_info_t ));
131
+ if (opal_help_want_aggregate ) {
132
+ PMIX_INFO_CREATE (dirs , 3 );
133
+ PMIX_INFO_LOAD (& dirs [ndirs ++ ], PMIX_LOG_AGG , & opal_help_want_aggregate , PMIX_BOOL );
134
+ PMIX_INFO_LOAD (& dirs [ndirs ++ ], PMIX_LOG_KEY , file , PMIX_STRING );
135
+ PMIX_INFO_LOAD (& dirs [ndirs ++ ], PMIX_LOG_VAL , topic , PMIX_STRING );
136
+ cbdata -> dirs = dirs ;
137
+ }
138
+
139
+ cbdata -> info = info ;
140
+ cbdata -> msg = msg ;
141
+
142
+ // PMIx and the runtime will aggregate, de-duplicate, and print this
143
+ // message to stderr.
144
+ pmix_status_t rc = PMIx_Log_nb (info , ninfo , dirs , ndirs , opal_show_help_cbfunc , cbdata );
145
+ if (PMIX_SUCCESS != rc ) {
146
+ // Aggregation/de-duplication functionality is *definitely* lost,
147
+ // but let's print the error anyway since duplicate error messages
148
+ // is better than hiding it.
149
+ opal_output (output_stream , "%s" , msg );
150
+ PMIX_INFO_DESTRUCT (info );
151
+ if (opal_help_want_aggregate ) {
152
+ PMIX_INFO_DESTRUCT (dirs );
153
+ }
154
+ free (msg );
155
+ free (cbdata );
156
+ }
157
+ }
158
+
91
159
/*
92
160
* Make one big string with all the lines. This isn't the most
93
161
* efficient method in the world, but we're going for clarity here --
@@ -180,10 +248,12 @@ static int open_file(const char *base, const char *topic)
180
248
181
249
/* If we still couldn't open it, then something is wrong */
182
250
if (NULL == opal_show_help_yyin ) {
183
- opal_output (output_stream ,
251
+ char * tmp ;
252
+ opal_asprintf (& tmp ,
184
253
"%sSorry! You were supposed to get help about:\n %s\nBut I couldn't open "
185
254
"the help file:\n %s. Sorry!\n%s" ,
186
255
dash_line , topic , err_msg , dash_line );
256
+ local_delivery (topic , err_msg , tmp );
187
257
free (err_msg );
188
258
return OPAL_ERR_NOT_FOUND ;
189
259
}
@@ -231,14 +301,15 @@ static int find_topic(const char *base, const char *topic)
231
301
case OPAL_SHOW_HELP_PARSE_MESSAGE :
232
302
break ;
233
303
234
- case OPAL_SHOW_HELP_PARSE_DONE :
235
- opal_output (output_stream ,
304
+ case OPAL_SHOW_HELP_PARSE_DONE : {
305
+ char * msg ;
306
+ opal_asprintf (& msg ,
236
307
"%sSorry! You were supposed to get help about:\n %s\nfrom the file:\n "
237
308
" %s\nBut I couldn't find that topic in the file. Sorry!\n%s" ,
238
309
dash_line , topic , base , dash_line );
310
+ local_delivery (topic , base , msg );
239
311
return OPAL_ERR_NOT_FOUND ;
240
- break ;
241
-
312
+ }
242
313
default :
243
314
break ;
244
315
}
@@ -344,8 +415,7 @@ static int opal_show_vhelp_internal(const char *filename, const char *topic, int
344
415
345
416
/* If we got a single string, output it with formatting */
346
417
if (NULL != output ) {
347
- opal_output (output_stream , "%s" , output );
348
- free (output );
418
+ local_delivery (filename , topic , output );
349
419
}
350
420
351
421
return (NULL == output ) ? OPAL_ERROR : OPAL_SUCCESS ;
0 commit comments