File tree Expand file tree Collapse file tree 1 file changed +24
-5
lines changed
opal/mca/event/libevent2022/libevent Expand file tree Collapse file tree 1 file changed +24
-5
lines changed Original file line number Diff line number Diff line change 42
42
#include <string.h>
43
43
#include <unistd.h>
44
44
#include <errno.h>
45
+ #include <fcntl.h>
45
46
46
47
#include "event-internal.h"
47
48
#include "evsignal-internal.h"
@@ -166,12 +167,30 @@ select_dispatch(struct event_base *base, struct timeval *tv)
166
167
check_selectop (sop );
167
168
168
169
if (res == -1 ) {
169
- if (errno != EINTR ) {
170
- event_warn ("select" );
171
- return (-1 );
170
+ if (errno == EINTR ) {
171
+ return (0 );
172
172
}
173
-
174
- return (0 );
173
+ /* There seems to be a very subtle race condition between the
174
+ * event_del and the select, where the fd is still active on the
175
+ * event_readset_in but no libevent structure make reference
176
+ * to it so it. Thus, any call to progress will no nothing more
177
+ * than print a warning and do nothing, leading to deadlocks.
178
+ * If we force remove the problematic fd, we get the warning only
179
+ * once, and things work as expected.
180
+ */
181
+ event_warn ("select" );
182
+ for (j = 0 ; j < nfds ; ++ j ) {
183
+ if (FD_ISSET (j , sop -> event_readset_in ) ||
184
+ FD_ISSET (j , sop -> event_writeset_in )) {
185
+ res = fcntl (j , F_GETFL );
186
+ if ( res == -1 ) {
187
+ event_warn ("bad file descriptor %d/%d\n" , j , nfds );
188
+ FD_CLR (j , sop -> event_readset_in );
189
+ FD_CLR (j , sop -> event_writeset_in );
190
+ }
191
+ }
192
+ }
193
+ return (-1 );
175
194
}
176
195
177
196
event_debug (("%s: select reports %d" , __func__ , res ));
You can’t perform that action at this time.
0 commit comments