@@ -158,6 +158,10 @@ impl InputFormatTextBase for InputFormatCSV {
158
158
}
159
159
160
160
state. rows_to_skip -= 1 ;
161
+ tracing:: debug!(
162
+ "csv aligner: skip a header row, remain {}" ,
163
+ state. rows_to_skip
164
+ ) ;
161
165
state. rows += 1 ;
162
166
endlen = 0 ;
163
167
}
@@ -179,7 +183,7 @@ impl InputFormatTextBase for InputFormatCSV {
179
183
path : state. path . to_string ( ) ,
180
184
batch_id : state. batch_id ,
181
185
offset : 0 ,
182
- start_row : None ,
186
+ start_row : Some ( state . rows ) ,
183
187
} ;
184
188
185
189
while !buf. is_empty ( ) {
@@ -191,9 +195,7 @@ impl InputFormatTextBase for InputFormatCSV {
191
195
endlen += n_end;
192
196
out_pos += n_out;
193
197
match result {
194
- ReadRecordResult :: InputEmpty => {
195
- break ;
196
- }
198
+ ReadRecordResult :: InputEmpty => break ,
197
199
ReadRecordResult :: OutputFull => {
198
200
return Err ( csv_error (
199
201
"output more than input" ,
@@ -243,21 +245,37 @@ impl InputFormatTextBase for InputFormatCSV {
243
245
}
244
246
}
245
247
248
+ reader. n_end = endlen;
249
+ out_tmp. truncate ( out_pos) ;
246
250
if row_batch. row_ends . is_empty ( ) {
247
- reader. out . extend_from_slice ( & out_tmp[ ..out_pos] ) ;
251
+ tracing:: debug!(
252
+ "csv aligner: {} + {} bytes => 0 rows" ,
253
+ reader. out. len( ) ,
254
+ buf_in. len( ) ,
255
+ ) ;
256
+ reader. out . extend_from_slice ( & out_tmp) ;
248
257
Ok ( vec ! [ ] )
249
258
} else {
250
- state. rows += row_batch. row_ends . len ( ) ;
251
259
let last_remain = mem:: take ( & mut reader. out ) ;
260
+
261
+ state. batch_id += 1 ;
262
+ state. rows += row_batch. row_ends . len ( ) ;
252
263
reader. out . extend_from_slice ( & out_tmp[ row_batch_end..] ) ;
264
+
265
+ tracing:: debug!(
266
+ "csv aligner: {} + {} bytes => {} rows + {} bytes remain" ,
267
+ last_remain. len( ) ,
268
+ buf_in. len( ) ,
269
+ row_batch. row_ends. len( ) ,
270
+ reader. out. len( )
271
+ ) ;
272
+
253
273
out_tmp. truncate ( row_batch_end) ;
254
- row_batch. start_row = Some ( state. rows ) ;
255
274
row_batch. data = if last_remain. is_empty ( ) {
256
275
out_tmp
257
276
} else {
258
277
vec ! [ last_remain, out_tmp] . concat ( )
259
278
} ;
260
- state. batch_id += 1 ;
261
279
Ok ( vec ! [ row_batch] )
262
280
}
263
281
}
0 commit comments