@@ -58,12 +58,13 @@ static mca_fbtl_base_module_1_0_0_t posix = {
58
58
#if defined (FBTL_POSIX_HAVE_AIO )
59
59
mca_fbtl_posix_ipwritev , /* non-blocking write */
60
60
mca_fbtl_posix_progress , /* module specific progress */
61
- mca_fbtl_posix_request_free /* free module specific data items on the request */
61
+ mca_fbtl_posix_request_free , /* free module specific data items on the request */
62
62
#else
63
63
NULL , /* non-blocking write */
64
64
NULL , /* module specific progress */
65
- NULL /* free module specific data items on the request */
65
+ NULL , /* free module specific data items on the request */
66
66
#endif
67
+ mca_fbtl_posix_check_atomicity /* check whether atomicity is supported on this fs */
67
68
};
68
69
/*
69
70
* *******************************************************************
@@ -144,34 +145,38 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
144
145
data -> aio_req_status [i ] = EINPROGRESS ;
145
146
start_offset = data -> aio_reqs [i ].aio_offset ;
146
147
total_length = data -> aio_reqs [i ].aio_nbytes ;
148
+ /* release previous lock */
149
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
150
+
147
151
if ( data -> aio_req_type == FBTL_POSIX_WRITE ) {
148
- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
152
+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length ,
153
+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
149
154
if ( 0 < ret_code ) {
150
155
opal_output (1 , "mca_fbtl_posix_progress: error in mca_fbtl_posix_lock() %d" , ret_code );
151
156
/* Just in case some part of the lock actually succeeded. */
152
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
157
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
153
158
return OMPI_ERROR ;
154
159
}
155
160
if (-1 == aio_write (& data -> aio_reqs [i ])) {
156
161
opal_output (1 , "mca_fbtl_posix_progress: error in aio_write()" );
157
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
162
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
158
163
return OMPI_ERROR ;
159
164
}
160
165
}
161
166
else if ( data -> aio_req_type == FBTL_POSIX_READ ) {
162
- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
167
+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length ,
168
+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
163
169
if ( 0 < ret_code ) {
164
170
opal_output (1 , "mca_fbtl_posix_progress: error in mca_fbtl_posix_lock() %d" , ret_code );
165
171
/* Just in case some part of the lock actually succeeded. */
166
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
172
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
167
173
return OMPI_ERROR ;
168
174
}
169
175
if (-1 == aio_read (& data -> aio_reqs [i ])) {
170
176
opal_output (1 , "mca_fbtl_posix_progress: error in aio_read()" );
171
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
177
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
172
178
return OMPI_ERROR ;
173
179
}
174
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
175
180
}
176
181
}
177
182
else {
@@ -199,10 +204,9 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
199
204
#if 0
200
205
printf ("lcount=%d open_reqs=%d\n" , lcount , data -> aio_open_reqs );
201
206
#endif
202
-
203
207
if ( (lcount == data -> aio_req_chunks ) && (0 != data -> aio_open_reqs )) {
204
208
/* release the lock of the previous operations */
205
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
209
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
206
210
207
211
/* post the next batch of operations */
208
212
data -> aio_first_active_req = data -> aio_last_active_req ;
@@ -218,30 +222,32 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
218
222
total_length = (end_offset - start_offset );
219
223
220
224
if ( FBTL_POSIX_READ == data -> aio_req_type ) {
221
- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
225
+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length ,
226
+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
222
227
}
223
228
else if ( FBTL_POSIX_WRITE == data -> aio_req_type ) {
224
- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
229
+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length ,
230
+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
225
231
}
226
232
if ( 0 < ret_code ) {
227
233
opal_output (1 , "mca_fbtl_posix_progress: error in mca_fbtl_posix_lock() %d" , ret_code );
228
234
/* Just in case some part of the lock actually succeeded. */
229
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
235
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
230
236
return OMPI_ERROR ;
231
237
}
232
238
233
239
for ( i = data -> aio_first_active_req ; i < data -> aio_last_active_req ; i ++ ) {
234
240
if ( FBTL_POSIX_READ == data -> aio_req_type ) {
235
241
if (-1 == aio_read (& data -> aio_reqs [i ])) {
236
242
opal_output (1 , "mca_fbtl_posix_progress: error in aio_read()" );
237
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
243
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
238
244
return OMPI_ERROR ;
239
245
}
240
246
}
241
247
else if ( FBTL_POSIX_WRITE == data -> aio_req_type ) {
242
248
if (-1 == aio_write (& data -> aio_reqs [i ])) {
243
249
opal_output (1 , "mca_fbtl_posix_progress: error in aio_write()" );
244
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
250
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
245
251
return OMPI_ERROR ;
246
252
}
247
253
}
@@ -255,8 +261,13 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
255
261
/* all pending operations are finished for this request */
256
262
req -> req_ompi .req_status .MPI_ERROR = OMPI_SUCCESS ;
257
263
req -> req_ompi .req_status ._ucount = data -> aio_total_len ;
258
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
259
- ret = true;
264
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
265
+
266
+ if ( data -> aio_fh -> f_atomicity ) {
267
+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
268
+ }
269
+
270
+ ret = true;
260
271
}
261
272
#endif
262
273
return ret ;
@@ -268,8 +279,8 @@ void mca_fbtl_posix_request_free ( mca_ompio_request_t *req)
268
279
/* Free the fbtl specific data structures */
269
280
mca_fbtl_posix_request_data_t * data = (mca_fbtl_posix_request_data_t * )req -> req_data ;
270
281
if (NULL != data ) {
271
- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
272
- if ( NULL != data -> aio_reqs ) {
282
+
283
+ if ( NULL != data -> aio_reqs ) {
273
284
free ( data -> aio_reqs );
274
285
}
275
286
if ( NULL != data -> aio_req_status ) {
@@ -281,3 +292,27 @@ void mca_fbtl_posix_request_free ( mca_ompio_request_t *req)
281
292
#endif
282
293
return ;
283
294
}
295
+
296
+ bool mca_fbtl_posix_check_atomicity ( ompio_file_t * file )
297
+ {
298
+ struct flock lock ;
299
+
300
+ lock .l_type = F_WRLCK ;
301
+ lock .l_whence = SEEK_SET ;
302
+ lock .l_start = 0 ;
303
+ lock .l_len = 0 ;
304
+ lock .l_pid = 0 ;
305
+
306
+ if (fcntl (file -> fd , F_GETLK , & lock ) < 0 )
307
+ {
308
+ #ifdef VERBOSE
309
+ printf ("Failed to get lock info for '%s': %s\n" , filename , strerror (errno ));
310
+ #endif
311
+ return false;
312
+ }
313
+
314
+ #ifdef VERBOSE
315
+ printf ("Lock would have worked, l_type=%d\n" , (int )lock .l_type );
316
+ #endif
317
+ return true;
318
+ }
0 commit comments