@@ -149,37 +149,171 @@ impl VirtualColumnReader {
149
149
) )
150
150
}
151
151
152
- pub fn deserialize_virtual_columns (
152
+ // pub fn deserialize_virtual_columns(
153
+ // &self,
154
+ // mut data_block: DataBlock,
155
+ // virtual_data: Option<VirtualBlockReadResult>,
156
+ //) -> Result<DataBlock> {
157
+ // let orig_schema = virtual_data
158
+ // .as_ref()
159
+ // .map(|virtual_data| virtual_data.schema.clone())
160
+ // .unwrap_or_default();
161
+ // let record_batch = virtual_data
162
+ // .map(|virtual_data| {
163
+ // let columns_chunks = virtual_data.data.columns_chunks()?;
164
+ // column_chunks_to_record_batch(
165
+ // &virtual_data.schema,
166
+ // virtual_data.num_rows,
167
+ // &columns_chunks,
168
+ // &virtual_data.compression,
169
+ // )
170
+ // })
171
+ // .transpose()?;
172
+
173
+ // // If the virtual column has already generated, add it directly,
174
+ // // otherwise extract it from the source column
175
+ // let func_ctx = self.ctx.get_function_context()?;
176
+ // for virtual_column_field in self.virtual_column_info.virtual_column_fields.iter() {
177
+ // let name = format!("{}", virtual_column_field.column_id);
178
+ // if let Some(arrow_array) = record_batch
179
+ // .as_ref()
180
+ // .and_then(|r| r.column_by_name(&name).cloned())
181
+ // {
182
+ // let orig_field = orig_schema.field_with_name(&name).unwrap();
183
+ // let orig_type: DataType = orig_field.data_type().into();
184
+ // let value = Value::Column(Column::from_arrow_rs(arrow_array, &orig_type)?);
185
+ // let data_type: DataType = virtual_column_field.data_type.as_ref().into();
186
+ // let column = if orig_type != data_type {
187
+ // let cast_func_name = format!(
188
+ // "to_{}",
189
+ // data_type.remove_nullable().to_string().to_lowercase()
190
+ // );
191
+ // let (cast_value, cast_data_type) = eval_function(
192
+ // None,
193
+ // &cast_func_name,
194
+ // [(value, orig_type)],
195
+ // &func_ctx,
196
+ // data_block.num_rows(),
197
+ // &BUILTIN_FUNCTIONS,
198
+ // )?;
199
+ // BlockEntry::new(cast_data_type, cast_value)
200
+ // } else {
201
+ // BlockEntry::new(data_type, value)
202
+ // };
203
+ // data_block.add_column(column);
204
+ // continue;
205
+ // }
206
+ // let src_index = self
207
+ // .source_schema
208
+ // .index_of(&virtual_column_field.source_name)
209
+ // .unwrap();
210
+ // let source = data_block.get_by_offset(src_index);
211
+ // let src_arg = (source.value.clone(), source.data_type.clone());
212
+ // let path_arg = (
213
+ // Value::Scalar(virtual_column_field.key_paths.clone()),
214
+ // DataType::String,
215
+ // );
216
+
217
+ // let (value, data_type) = eval_function(
218
+ // None,
219
+ // "get_by_keypath",
220
+ // [src_arg, path_arg],
221
+ // &func_ctx,
222
+ // data_block.num_rows(),
223
+ // &BUILTIN_FUNCTIONS,
224
+ // )?;
225
+
226
+ // let column = if let Some(cast_func_name) = &virtual_column_field.cast_func_name {
227
+ // let (cast_value, cast_data_type) = eval_function(
228
+ // None,
229
+ // cast_func_name,
230
+ // [(value, data_type)],
231
+ // &func_ctx,
232
+ // data_block.num_rows(),
233
+ // &BUILTIN_FUNCTIONS,
234
+ // )?;
235
+ // BlockEntry::new(cast_data_type, cast_value)
236
+ // } else {
237
+ // BlockEntry::new(data_type, value)
238
+ // };
239
+ // data_block.add_column(column);
240
+ // }
241
+
242
+ // Ok(data_block)
243
+ //}
244
+ /// Deserialize virtual column data into record batches, according to the `batch_size`.
245
+ pub fn try_create_paster (
153
246
& self ,
154
- mut data_block : DataBlock ,
155
247
virtual_data : Option < VirtualBlockReadResult > ,
156
- ) -> Result < DataBlock > {
248
+ batch_size_hint : Option < usize > ,
249
+ ) -> Result < VirtualColumnDataPaster > {
157
250
let orig_schema = virtual_data
158
251
. as_ref ( )
159
252
. map ( |virtual_data| virtual_data. schema . clone ( ) )
160
253
. unwrap_or_default ( ) ;
161
- let record_batch = virtual_data
162
- . map ( |virtual_data| {
163
- let columns_chunks = virtual_data. data . columns_chunks ( ) ?;
164
- column_chunks_to_record_batch (
165
- & virtual_data. schema ,
166
- virtual_data. num_rows ,
167
- & columns_chunks,
168
- & virtual_data. compression ,
169
- )
170
- } )
171
- . transpose ( ) ?;
254
+
255
+ let record_batches = if let Some ( virtual_data) = virtual_data {
256
+ let columns_chunks = virtual_data. data . columns_chunks ( ) ?;
257
+ let chunks = column_chunks_to_record_batch (
258
+ & self . virtual_column_info . schema ,
259
+ virtual_data. num_rows ,
260
+ & columns_chunks,
261
+ & virtual_data. compression ,
262
+ batch_size_hint,
263
+ ) ?;
264
+ Some ( chunks)
265
+ } else {
266
+ None
267
+ } ;
268
+
269
+ let function_context = self . ctx . get_function_context ( ) ?;
270
+
271
+ // Unfortunately, Paster cannot hold references to the fields that being cloned,
272
+ // since the caller `DeserializeDataTransform` will take mutable reference of
273
+ // VirtualColumnReader indirectly.
274
+ Ok ( VirtualColumnDataPaster {
275
+ record_batches,
276
+ function_context,
277
+ next_record_batch_index : 0 ,
278
+ virtual_column_fields : self . virtual_column_info . virtual_column_fields . clone ( ) ,
279
+ source_schema : self . source_schema . clone ( ) ,
280
+ orig_schema,
281
+ } )
282
+ }
283
+ }
284
+
285
+ pub struct VirtualColumnDataPaster {
286
+ record_batches : Option < Vec < RecordBatch > > ,
287
+ next_record_batch_index : usize ,
288
+ function_context : FunctionContext ,
289
+ virtual_column_fields : Vec < VirtualColumnField > ,
290
+ source_schema : TableSchemaRef ,
291
+ orig_schema : TableSchemaRef ,
292
+ }
293
+
294
+ impl VirtualColumnDataPaster {
295
+ /// Paste virtual column to `data_block` if necessary
296
+ pub fn paste_virtual_column ( & mut self , mut data_block : DataBlock ) -> Result < DataBlock > {
297
+ let record_batch = if let Some ( record_batches) = & self . record_batches {
298
+ assert ! ( record_batches. len( ) > self . next_record_batch_index) ;
299
+ Some ( & record_batches[ self . next_record_batch_index ] )
300
+ } else {
301
+ None
302
+ } ;
303
+
304
+ self . next_record_batch_index += 1 ;
305
+
306
+ let func_ctx = & self . function_context ;
172
307
173
308
// If the virtual column has already generated, add it directly,
174
309
// otherwise extract it from the source column
175
- let func_ctx = self . ctx . get_function_context ( ) ?;
176
- for virtual_column_field in self . virtual_column_info . virtual_column_fields . iter ( ) {
310
+ for virtual_column_field in self . virtual_column_fields . iter ( ) {
177
311
let name = format ! ( "{}" , virtual_column_field. column_id) ;
178
312
if let Some ( arrow_array) = record_batch
179
313
. as_ref ( )
180
314
. and_then ( |r| r. column_by_name ( & name) . cloned ( ) )
181
315
{
182
- let orig_field = orig_schema. field_with_name ( & name) . unwrap ( ) ;
316
+ let orig_field = self . orig_schema . field_with_name ( & name) . unwrap ( ) ;
183
317
let orig_type: DataType = orig_field. data_type ( ) . into ( ) ;
184
318
let value = Value :: Column ( Column :: from_arrow_rs ( arrow_array, & orig_type) ?) ;
185
319
let data_type: DataType = virtual_column_field. data_type . as_ref ( ) . into ( ) ;
@@ -240,5 +374,52 @@ impl VirtualColumnReader {
240
374
}
241
375
242
376
Ok ( data_block)
377
+
378
+ // for virtual_column_field in self.virtual_column_fields.iter() {
379
+ // if let Some(arrow_array) =
380
+ // record_batch.and_then(|r| r.column_by_name(&virtual_column_field.name).cloned())
381
+ // {
382
+ // let data_type: DataType = virtual_column_field.data_type.as_ref().into();
383
+ // let value = Value::Column(Column::from_arrow_rs(arrow_array, &data_type)?);
384
+ // data_block.add_column(BlockEntry::new(data_type, value));
385
+ // continue;
386
+ // }
387
+ // let src_index = self
388
+ // .source_schema
389
+ // .index_of(&virtual_column_field.source_name)
390
+ // .unwrap();
391
+ // let source = data_block.get_by_offset(src_index);
392
+ // let src_arg = (source.value.clone(), source.data_type.clone());
393
+ // let path_arg = (
394
+ // Value::Scalar(virtual_column_field.key_paths.clone()),
395
+ // DataType::String,
396
+ // );
397
+
398
+ // let (value, data_type) = eval_function(
399
+ // None,
400
+ // "get_by_keypath",
401
+ // [src_arg, path_arg],
402
+ // func_ctx,
403
+ // data_block.num_rows(),
404
+ // &BUILTIN_FUNCTIONS,
405
+ // )?;
406
+
407
+ // let column = if let Some(cast_func_name) = &virtual_column_field.cast_func_name {
408
+ // let (cast_value, cast_data_type) = eval_function(
409
+ // None,
410
+ // cast_func_name,
411
+ // [(value, data_type)],
412
+ // func_ctx,
413
+ // data_block.num_rows(),
414
+ // &BUILTIN_FUNCTIONS,
415
+ // )?;
416
+ // BlockEntry::new(cast_data_type, cast_value)
417
+ // } else {
418
+ // BlockEntry::new(data_type, value)
419
+ // };
420
+ // data_block.add_column(column);
421
+ //}
422
+
423
+ // Ok(data_block)
243
424
}
244
425
}
0 commit comments