Skip to content

Commit 27b2ec7

Browse files
committed
common/ompio: add support for read operations and collective I/O
external32 data representation is now support by ompio for everything but non-blocking collective I/O operations. The support can further be improved in a second step to limit the temporary buffer size (at least for blocking operations), but it does work now for many scenarios. Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
1 parent ab56e6f commit 27b2ec7

File tree

2 files changed

+96
-13
lines changed

2 files changed

+96
-13
lines changed

ompi/mca/common/ompio/common_ompio_file_read.c

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,11 +401,52 @@ int mca_common_ompio_file_read_all (ompio_file_t *fh,
401401
ompi_status_public_t * status)
402402
{
403403
int ret = OMPI_SUCCESS;
404-
ret = fh->f_fcoll->fcoll_file_read_all (fh,
405-
buf,
406-
count,
407-
datatype,
408-
status);
404+
405+
406+
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
407+
!(datatype == &ompi_mpi_byte.dt ||
408+
datatype == &ompi_mpi_char.dt )) {
409+
/* No need to check for GPU buffer for collective I/O.
410+
Most algorithms copy data from aggregators, and send/recv
411+
to/from GPU buffers works if ompi was compiled was GPU support.
412+
413+
If the individual fcoll component is used: there are no aggregators
414+
in that concept. However, since they call common_ompio_file_write,
415+
CUDA buffers are handled by that routine.
416+
417+
Thus, we only check for
418+
1. Datarepresentation is anything other than 'native' and
419+
2. datatype is not byte or char (i.e it does require some actual
420+
work to be done e.g. for external32.
421+
*/
422+
size_t pos=0, max_data=0;
423+
char *tbuf=NULL;
424+
opal_convertor_t convertor;
425+
struct iovec *decoded_iov = NULL;
426+
uint32_t iov_count = 0;
427+
428+
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
429+
ret = fh->f_fcoll->fcoll_file_read_all (fh,
430+
decoded_iov->iov_base,
431+
decoded_iov->iov_len,
432+
MPI_BYTE,
433+
status);
434+
opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
435+
436+
opal_convertor_cleanup (&convertor);
437+
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
438+
if (NULL != decoded_iov) {
439+
free (decoded_iov);
440+
decoded_iov = NULL;
441+
}
442+
}
443+
else {
444+
ret = fh->f_fcoll->fcoll_file_read_all (fh,
445+
buf,
446+
count,
447+
datatype,
448+
status);
449+
}
409450
return ret;
410451
}
411452

ompi/mca/common/ompio/common_ompio_file_write.c

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,11 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
159159
fh->f_io_array = NULL;
160160
}
161161
}
162-
#if OPAL_CUDA_SUPPORT
163-
if ( is_gpu && !is_managed ) {
162+
163+
if ( need_to_copy ) {
164164
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
165165
}
166-
#endif
166+
167167

168168
if (NULL != decoded_iov) {
169169
free (decoded_iov);
@@ -378,11 +378,53 @@ int mca_common_ompio_file_write_all (ompio_file_t *fh,
378378
ompi_status_public_t *status)
379379
{
380380
int ret = OMPI_SUCCESS;
381-
ret = fh->f_fcoll->fcoll_file_write_all (fh,
382-
buf,
383-
count,
384-
datatype,
385-
status);
381+
382+
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
383+
!(datatype == &ompi_mpi_byte.dt ||
384+
datatype == &ompi_mpi_char.dt )) {
385+
/* No need to check for GPU buffer for collective I/O.
386+
Most algorithms first copy data to aggregators, and send/recv
387+
to/from GPU buffers works if ompi was compiled was GPU support.
388+
389+
If the individual fcoll component is used: there are no aggregators
390+
in that concept. However, since they call common_ompio_file_write,
391+
CUDA buffers are handled by that routine.
392+
393+
Thus, we only check for
394+
1. Datarepresentation is anything other than 'native' and
395+
2. datatype is not byte or char (i.e it does require some actual
396+
work to be done e.g. for external32.
397+
*/
398+
size_t pos=0, max_data=0;
399+
char *tbuf=NULL;
400+
opal_convertor_t convertor;
401+
struct iovec *decoded_iov = NULL;
402+
uint32_t iov_count = 0;
403+
404+
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
405+
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
406+
opal_convertor_cleanup ( &convertor);
407+
408+
ret = fh->f_fcoll->fcoll_file_write_all (fh,
409+
decoded_iov->iov_base,
410+
decoded_iov->iov_len,
411+
MPI_BYTE,
412+
status);
413+
414+
415+
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
416+
if (NULL != decoded_iov) {
417+
free (decoded_iov);
418+
decoded_iov = NULL;
419+
}
420+
}
421+
else {
422+
ret = fh->f_fcoll->fcoll_file_write_all (fh,
423+
buf,
424+
count,
425+
datatype,
426+
status);
427+
}
386428
return ret;
387429
}
388430

0 commit comments

Comments
 (0)