Skip to content

Commit 6fea889

Browse files
committed
util: Fix write handling in multiple functions
Fix both a real (but unlikely) error and warning with the return code of write(). Recent versions of Glibc tag the write() function to raise a warning if the return value is ignored. The issue is real, so do a slightly better job of handling the issue and silence the warning. Signed-off-by: Brian Barrett <bbarrett@amazon.com>
1 parent b3a9ad3 commit 6fea889

File tree

7 files changed

+71
-18
lines changed

7 files changed

+71
-18
lines changed

ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "sharedfp_sm.h"
3636

3737
#include "mpi.h"
38+
#include "opal/util/output.h"
3839
#include "ompi/constants.h"
3940
#include "ompi/group/group.h"
4041
#include "ompi/proc/proc.h"
@@ -139,7 +140,10 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
139140
/* TODO: is it necessary to write to the file first? */
140141
if( 0 == fh->f_rank ){
141142
memset ( &sm_offset, 0, sizeof (struct mca_sharedfp_sm_offset ));
142-
write ( sm_fd, &sm_offset, sizeof(struct mca_sharedfp_sm_offset));
143+
err = opal_best_effort_write ( sm_fd, &sm_offset, sizeof(struct mca_sharedfp_sm_offset));
144+
if (OPAL_SUCCESS != err) {
145+
return err;
146+
}
143147
}
144148
err = comm->c_coll->coll_barrier (comm, comm->c_coll->coll_barrier_module );
145149
if ( OMPI_SUCCESS != err ) {

opal/mca/backtrace/execinfo/backtrace_execinfo.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#endif
3131

3232
#include "opal/constants.h"
33+
#include "opal/util/output.h"
3334
#include "opal/mca/backtrace/backtrace.h"
3435

3536
int opal_backtrace_print(FILE *file, char *prefix, int strip)
@@ -52,10 +53,10 @@ int opal_backtrace_print(FILE *file, char *prefix, int strip)
5253

5354
for (i = strip; i < trace_size; i++) {
5455
if (NULL != prefix) {
55-
write(fd, prefix, strlen(prefix));
56+
opal_best_effort_write(fd, prefix, strlen(prefix));
5657
}
5758
len = snprintf(buf, sizeof(buf), "[%2d] ", i - strip);
58-
write(fd, buf, len);
59+
opal_best_effort_write(fd, buf, len);
5960
backtrace_symbols_fd(&trace[i], 1, fd);
6061
}
6162

opal/mca/rcache/base/rcache_base_mem_cb.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#endif
3131

3232
#include "opal/runtime/opal_params.h"
33+
#include "opal/util/output.h"
3334
#include "opal/util/proc.h"
3435
#include "opal/util/show_help.h"
3536

@@ -72,7 +73,7 @@ void mca_rcache_base_mem_cb(void *base, size_t size, void *cbdata, bool from_all
7273
"will now abort.\n",
7374
opal_process_info.nodename, getpid(), base, (unsigned long) size);
7475
msg[sizeof(msg) - 1] = '\0';
75-
write(2, msg, len);
76+
opal_best_effort_write(2, msg, len);
7677
} else {
7778
opal_show_help("help-rcache-base.txt", "cannot deregister in-use memory", true,
7879
current->rcache_component->rcache_version.mca_component_name,

opal/util/error.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "opal/constants.h"
3636
#include "opal/runtime/opal_params.h"
3737
#include "opal/util/error.h"
38+
#include "opal/util/output.h"
3839
#include "opal/util/printf.h"
3940
#include "opal/util/proc.h"
4041
#include "opal/util/string_copy.h"
@@ -217,14 +218,18 @@ void opal_delay_abort(void)
217218
"[%s:%05d] Looping forever "
218219
"(MCA parameter opal_abort_delay is < 0)\n",
219220
opal_process_info.nodename, (int) pid);
220-
write(STDERR_FILENO, msg, strlen(msg));
221+
} else {
222+
snprintf(msg, sizeof(msg), "[%s:%05d] Delaying for %d seconds before aborting\n",
223+
opal_process_info.nodename, (int) pid, delay);
224+
}
225+
226+
opal_best_effort_write(STDERR_FILENO, msg, strlen(msg));
227+
228+
if (delay < 0) {
221229
while (1) {
222230
sleep(5);
223231
}
224232
} else {
225-
snprintf(msg, sizeof(msg), "[%s:%05d] Delaying for %d seconds before aborting\n",
226-
opal_process_info.nodename, (int) pid, delay);
227-
write(STDERR_FILENO, msg, strlen(msg));
228233
do {
229234
sleep(1);
230235
} while (--delay > 0);

opal/util/output.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -948,14 +948,20 @@ static int output(int output_id, const char *format, va_list arglist)
948948

949949
/* stdout output */
950950
if (ldi->ldi_stdout) {
951-
write(fileno(stdout), out, (int) strlen(out));
951+
int tmp = opal_best_effort_write(fileno(stdout), out, strlen(out));
952+
if (OPAL_SUCCESS != tmp) {
953+
rc = tmp;
954+
}
952955
fflush(stdout);
953956
}
954957

955958
/* stderr output */
956959
if (ldi->ldi_stderr) {
957-
write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, out,
958-
(int) strlen(out));
960+
int tmp = opal_best_effort_write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd,
961+
out, strlen(out));
962+
if (OPAL_SUCCESS != tmp) {
963+
rc = tmp;
964+
}
959965
fflush(stderr);
960966
}
961967

@@ -970,17 +976,24 @@ static int output(int output_id, const char *format, va_list arglist)
970976
++ldi->ldi_file_num_lines_lost;
971977
} else if (ldi->ldi_file_num_lines_lost > 0) {
972978
char buffer[BUFSIZ];
979+
int tmp;
973980
memset(buffer, 0, BUFSIZ);
974981
snprintf(buffer, BUFSIZ - 1,
975982
"[WARNING: %d lines lost because the Open MPI process session "
976983
"directory did\n not exist when opal_output() was invoked]\n",
977984
ldi->ldi_file_num_lines_lost);
978-
write(ldi->ldi_fd, buffer, (int) strlen(buffer));
985+
tmp = opal_best_effort_write(ldi->ldi_fd, buffer, strlen(buffer));
986+
if (OPAL_SUCCESS != tmp) {
987+
rc = tmp;
988+
}
979989
ldi->ldi_file_num_lines_lost = 0;
980990
}
981991
}
982992
if (ldi->ldi_fd != -1) {
983-
write(ldi->ldi_fd, out, (int) strlen(out));
993+
int tmp = opal_best_effort_write(ldi->ldi_fd, out, strlen(out));
994+
if (OPAL_SUCCESS != tmp) {
995+
rc = tmp;
996+
}
984997
}
985998
}
986999
OPAL_THREAD_UNLOCK(&mutex);

opal/util/output.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@
7171
#include "opal_config.h"
7272

7373
#include <stdarg.h>
74+
#include <unistd.h>
75+
#include <errno.h>
7476

77+
#include "opal/constants.h"
7578
#include "opal/class/opal_object.h"
7679

7780
BEGIN_C_DECLS
@@ -552,6 +555,30 @@ OPAL_DECLSPEC void opal_output_set_output_file_info(const char *dir, const char
552555
*/
553556
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_output_stream_t);
554557

558+
/**
559+
* Best effort write for blocking file descriptors
560+
*
561+
* A wrapper around write() that will spin trying to write the entire
562+
* buffer until either an error occurs or a write is not able to write
563+
* any data. Useful for situations where opal_output cannot be used
564+
* for signal reasons. Do not use with non-blocking file descriptors.
565+
*/
566+
static inline int opal_best_effort_write(int fd, const void *buf, size_t count)
567+
{
568+
size_t written = 0;
569+
570+
while (written != count) {
571+
ssize_t tmp = write(fd, (char *)buf + written, count - written);
572+
/* we explicitly do not recover from errors in this wrapper */
573+
if ((tmp < 0 && errno != EINTR) || (tmp == 0)) {
574+
return OPAL_ERROR;
575+
}
576+
written += tmp;
577+
}
578+
579+
return OPAL_SUCCESS;
580+
}
581+
555582
END_C_DECLS
556583

557584
#endif /* OPAL_OUTPUT_H_ */

opal/util/stacktrace.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ static void show_stackframe(int signo, siginfo_t *info, void *p)
146146
memset(print_buffer, 0, sizeof(print_buffer));
147147
ret = snprintf(print_buffer, sizeof(print_buffer),
148148
HOSTFORMAT "*** Process received signal ***\n", stacktrace_hostname, getpid());
149-
write(opal_stacktrace_output_fileno, print_buffer, ret);
149+
opal_best_effort_write(opal_stacktrace_output_fileno, print_buffer, ret);
150150

151151
memset(print_buffer, 0, sizeof(print_buffer));
152152

@@ -467,23 +467,25 @@ static void show_stackframe(int signo, siginfo_t *info, void *p)
467467
}
468468

469469
/* write out the signal information generated above */
470-
write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer) - size);
470+
opal_best_effort_write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer) - size);
471471

472472
/* print out the stack trace */
473473
snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT, stacktrace_hostname, getpid());
474474
ret = opal_backtrace_print(NULL, print_buffer, 2);
475475
if (OPAL_SUCCESS != ret) {
476-
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
476+
opal_best_effort_write(opal_stacktrace_output_fileno, unable_to_print_msg,
477+
strlen(unable_to_print_msg));
477478
}
478479

479480
/* write out the footer information */
480481
memset(print_buffer, 0, sizeof(print_buffer));
481482
ret = snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT "*** End of error message ***\n",
482483
stacktrace_hostname, getpid());
483484
if (ret > 0) {
484-
write(opal_stacktrace_output_fileno, print_buffer, ret);
485+
opal_best_effort_write(opal_stacktrace_output_fileno, print_buffer, ret);
485486
} else {
486-
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
487+
opal_best_effort_write(opal_stacktrace_output_fileno, unable_to_print_msg,
488+
strlen(unable_to_print_msg));
487489
}
488490

489491
if (fileno(stdout) != opal_stacktrace_output_fileno

0 commit comments

Comments
 (0)