@@ -88,17 +88,33 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
88
88
return ret ;
89
89
}
90
90
91
-
91
+ bool need_to_copy = false;
92
+ opal_convertor_t convertor ;
92
93
#if OPAL_CUDA_SUPPORT
93
94
int is_gpu , is_managed ;
94
- opal_convertor_t convertor ;
95
95
mca_common_ompio_check_gpu_buf ( fh , buf , & is_gpu , & is_managed );
96
96
if ( is_gpu && !is_managed ) {
97
+ need_to_copy = true;
98
+ }
99
+ #endif
100
+
101
+ if ( !( fh -> f_flags & OMPIO_DATAREP_NATIVE ) &&
102
+ !(datatype == & ompi_mpi_byte .dt ||
103
+ datatype == & ompi_mpi_char .dt )) {
104
+ /* only need to copy if any of these conditions are given:
105
+ 1. buffer is an unmanaged CUDA buffer (checked above).
106
+ 2. Datarepresentation is anything other than 'native' and
107
+ 3. datatype is not byte or char (i.e it does require some actual
108
+ work to be done e.g. for external32.
109
+ */
110
+ need_to_copy = true;
111
+ }
112
+
113
+ if ( need_to_copy ) {
97
114
char * tbuf = NULL ;
98
115
99
- OMPIO_CUDA_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
100
-
101
- }
116
+ OMPIO_PREPARE_READ_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
117
+ }
102
118
else {
103
119
mca_common_ompio_decode_datatype (fh ,
104
120
datatype ,
@@ -109,16 +125,7 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
109
125
& decoded_iov ,
110
126
& iov_count );
111
127
}
112
- #else
113
- mca_common_ompio_decode_datatype (fh ,
114
- datatype ,
115
- count ,
116
- buf ,
117
- & max_data ,
118
- fh -> f_mem_convertor ,
119
- & decoded_iov ,
120
- & iov_count );
121
- #endif
128
+
122
129
if ( 0 < max_data && 0 == fh -> f_iov_count ) {
123
130
if ( MPI_STATUS_IGNORE != status ) {
124
131
status -> _ucount = 0 ;
@@ -170,15 +177,14 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
170
177
}
171
178
}
172
179
173
- #if OPAL_CUDA_SUPPORT
174
- if ( is_gpu && !is_managed ) {
180
+ if ( need_to_copy ) {
175
181
size_t pos = 0 ;
176
182
177
183
opal_convertor_unpack (& convertor , decoded_iov , & iov_count , & pos );
178
184
opal_convertor_cleanup (& convertor );
179
185
mca_common_ompio_release_buf (fh , decoded_iov -> iov_base );
180
186
}
181
- #endif
187
+
182
188
if (NULL != decoded_iov ) {
183
189
free (decoded_iov );
184
190
decoded_iov = NULL ;
@@ -257,13 +263,32 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
257
263
int i = 0 ; /* index into the decoded iovec of the buffer */
258
264
int j = 0 ; /* index into the file vie iovec */
259
265
266
+ bool need_to_copy = false;
267
+
260
268
#if OPAL_CUDA_SUPPORT
261
269
int is_gpu , is_managed ;
262
270
mca_common_ompio_check_gpu_buf ( fh , buf , & is_gpu , & is_managed );
263
271
if ( is_gpu && !is_managed ) {
272
+ need_to_copy = true;
273
+ }
274
+ #endif
275
+
276
+ if ( !( fh -> f_flags & OMPIO_DATAREP_NATIVE ) &&
277
+ !(datatype == & ompi_mpi_byte .dt ||
278
+ datatype == & ompi_mpi_char .dt )) {
279
+ /* only need to copy if any of these conditions are given:
280
+ 1. buffer is an unmanaged CUDA buffer (checked above).
281
+ 2. Datarepresentation is anything other than 'native' and
282
+ 3. datatype is not byte or char (i.e it does require some actual
283
+ work to be done e.g. for external32.
284
+ */
285
+ need_to_copy = true;
286
+ }
287
+
288
+ if ( need_to_copy ) {
264
289
char * tbuf = NULL ;
265
290
266
- OMPIO_CUDA_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& ompio_req -> req_convertor ,max_data ,decoded_iov ,iov_count );
291
+ OMPIO_PREPARE_READ_BUF (fh ,buf ,count ,datatype ,tbuf ,& ompio_req -> req_convertor ,max_data ,decoded_iov ,iov_count );
267
292
268
293
ompio_req -> req_tbuf = tbuf ;
269
294
ompio_req -> req_size = max_data ;
@@ -278,16 +303,7 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
278
303
& decoded_iov ,
279
304
& iov_count );
280
305
}
281
- #else
282
- mca_common_ompio_decode_datatype (fh ,
283
- datatype ,
284
- count ,
285
- buf ,
286
- & max_data ,
287
- fh -> f_mem_convertor ,
288
- & decoded_iov ,
289
- & iov_count );
290
- #endif
306
+
291
307
if ( 0 < max_data && 0 == fh -> f_iov_count ) {
292
308
ompio_req -> req_ompi .req_status .MPI_ERROR = OMPI_SUCCESS ;
293
309
ompio_req -> req_ompi .req_status ._ucount = 0 ;
0 commit comments