Skip to content

Commit f6b3a0a

Browse files
committed
common/ompio: individual write of external32 works
both blocking and non-blocking. collective write and read operations not yet. Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
1 parent d955753 commit f6b3a0a

File tree

5 files changed

+60
-32
lines changed

5 files changed

+60
-32
lines changed

ompi/mca/common/ompio/common_ompio.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
#define OMPIO_LOCK_ENTIRE_FILE 0x00000080
6767
#define OMPIO_LOCK_NEVER 0x00000100
6868
#define OMPIO_LOCK_NOT_THIS_OP 0x00000200
69-
69+
#define OMPIO_DATAREP_NATIVE 0x00000400
7070

7171
#define OMPIO_ROOT 0
7272

ompi/mca/common/ompio/common_ompio_buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323

2424

2525
#define OMPIO_PREPARE_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
26-
opal_convertor_clone ( _fh->f_file_convertor, _convertor, 0); \
27-
opal_convertor_prepare_for_send ( _convertor, &(_datatype->super), _count, _buf );\
26+
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
27+
opal_convertor_copy_and_prepare_for_send ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, CONVERTOR_SEND_CONVERSION, _convertor ); \
2828
opal_convertor_get_packed_size( _convertor, &_max_data ); \
2929
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
3030
if ( NULL == _tbuf ) { \

ompi/mca/common/ompio/common_ompio_file_open.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,13 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
326326

327327
if (NULL != ompio_fh->f_mem_convertor) {
328328
opal_convertor_cleanup (ompio_fh->f_mem_convertor);
329-
//free (ompio_fh->f_mem_convertor);
329+
free (ompio_fh->f_mem_convertor);
330330
ompio_fh->f_mem_convertor = NULL;
331331
}
332332

333333
if (NULL != ompio_fh->f_file_convertor) {
334334
opal_convertor_cleanup (ompio_fh->f_file_convertor);
335-
//free (ompio_fh->f_file_convertor);
335+
free (ompio_fh->f_file_convertor);
336336
ompio_fh->f_file_convertor = NULL;
337337
}
338338

@@ -391,6 +391,13 @@ int mca_common_ompio_file_get_position (ompio_file_t *fh,
391391
{
392392
OMPI_MPI_OFFSET_TYPE off;
393393

394+
if ( 0 == fh->f_view_extent ||
395+
0 == fh->f_view_size ||
396+
0 == fh->f_etype_size ) {
397+
/* not sure whether we should raise an error here */
398+
*offset = 0;
399+
return OMPI_SUCCESS;
400+
}
394401
/* No. of copies of the entire file view */
395402
off = (fh->f_offset - fh->f_disp)/fh->f_view_extent;
396403

ompi/mca/common/ompio/common_ompio_file_view.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
9393

9494
if (NULL != fh->f_file_convertor) {
9595
opal_convertor_cleanup (fh->f_file_convertor);
96-
//free (fh->f_file_convertor);
96+
free (fh->f_file_convertor);
9797
fh->f_file_convertor = NULL;
9898
}
9999

@@ -104,17 +104,21 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
104104
if ( fh->f_flags & OMPIO_UNIFORM_FVIEW ) {
105105
fh->f_flags &= ~OMPIO_UNIFORM_FVIEW;
106106
}
107+
if ( fh->f_flags & OMPIO_DATAREP_NATIVE ) {
108+
fh->f_flags &= ~OMPIO_DATAREP_NATIVE;
109+
}
107110
fh->f_datarep = strdup (datarep);
108111

109112
if ( !(strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
110-
fh->f_file_convertor = malloc (sizeof(opal_convertor_t));
113+
fh->f_file_convertor = malloc ( sizeof(struct opal_convertor_t) );
111114
if ( NULL == fh->f_file_convertor ) {
112115
return OMPI_ERR_OUT_OF_RESOURCE;
113116
}
114117
opal_convertor_clone (ompi_mpi_external32_convertor, fh->f_file_convertor, 0);
115118
}
116119
else {
117120
fh->f_file_convertor = opal_convertor_create (opal_local_arch, 0);
121+
fh->f_flags |= OMPIO_DATAREP_NATIVE;
118122
}
119123

120124
datatype_duplicate (filetype, &fh->f_orig_filetype );

ompi/mca/common/ompio/common_ompio_file_write.c

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
#include <unistd.h>
3636
#include <math.h>
3737

38-
3938
int mca_common_ompio_file_write (ompio_file_t *fh,
4039
const void *buf,
4140
int count,
@@ -70,16 +69,34 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
7069
return ret;
7170
}
7271

72+
bool need_to_copy = false;
73+
7374
#if OPAL_CUDA_SUPPORT
7475
int is_gpu, is_managed;
7576
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
7677
if ( is_gpu && !is_managed ) {
78+
need_to_copy = true;
79+
}
80+
#endif
81+
82+
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
83+
!(datatype == &ompi_mpi_byte.dt ||
84+
datatype == &ompi_mpi_char.dt )) {
85+
/* only need to copy if any of these conditions are given:
86+
1. buffer is an unmanaged CUDA buffer (checked above).
87+
2. Datarepresentation is anything other than 'native' and
88+
3. datatype is not byte or char (i.e it does require some actual
89+
work to be done e.g. for external32.
90+
*/
91+
need_to_copy = true;
92+
}
93+
94+
if ( need_to_copy ) {
7795
size_t pos=0;
7896
char *tbuf=NULL;
7997
opal_convertor_t convertor;
8098

81-
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
82-
99+
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
83100
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
84101
opal_convertor_cleanup ( &convertor);
85102
}
@@ -93,16 +110,7 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
93110
&decoded_iov,
94111
&iov_count);
95112
}
96-
#else
97-
mca_common_ompio_decode_datatype (fh,
98-
datatype,
99-
count,
100-
buf,
101-
&max_data,
102-
fh->f_mem_convertor,
103-
&decoded_iov,
104-
&iov_count);
105-
#endif
113+
106114
if ( 0 < max_data && 0 == fh->f_iov_count ) {
107115
if ( MPI_STATUS_IGNORE != status ) {
108116
status->_ucount = 0;
@@ -230,16 +238,34 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
230238
int i = 0; /* index into the decoded iovec of the buffer */
231239
int j = 0; /* index into the file vie iovec */
232240

241+
bool need_to_copy = false;
242+
233243
#if OPAL_CUDA_SUPPORT
234244
int is_gpu, is_managed;
235245
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
236246
if ( is_gpu && !is_managed ) {
247+
need_to_copy = true;
248+
}
249+
#endif
250+
251+
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
252+
!(datatype == &ompi_mpi_byte.dt ||
253+
datatype == &ompi_mpi_char.dt )) {
254+
/* only need to copy if any of these conditions are given:
255+
1. buffer is an unmanaged CUDA buffer (checked above).
256+
2. Datarepresentation is anything other than 'native' and
257+
3. datatype is not byte or char (i.e it does require some actual
258+
work to be done e.g. for external32.
259+
*/
260+
need_to_copy = true;
261+
}
262+
263+
if ( need_to_copy ) {
237264
size_t pos=0;
238265
char *tbuf=NULL;
239266
opal_convertor_t convertor;
240-
241-
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
242267

268+
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
243269
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
244270
opal_convertor_cleanup (&convertor);
245271

@@ -256,16 +282,7 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
256282
&decoded_iov,
257283
&iov_count);
258284
}
259-
#else
260-
mca_common_ompio_decode_datatype (fh,
261-
datatype,
262-
count,
263-
buf,
264-
&max_data,
265-
fh->f_mem_convertor,
266-
&decoded_iov,
267-
&iov_count);
268-
#endif
285+
269286
if ( 0 < max_data && 0 == fh->f_iov_count ) {
270287
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
271288
ompio_req->req_ompi.req_status._ucount = 0;

0 commit comments

Comments
 (0)