@@ -333,6 +333,44 @@ impl DataFrame {
333
333
columns : table. columns ,
334
334
}
335
335
}
336
+
337
+ pub fn from_feather ( path : & str ) -> Result < Self , ArrowError > {
338
+ use crate :: io:: feather:: * ;
339
+
340
+ let mut reader = FeatherReader :: new ( File :: open ( path) ?) ;
341
+ let batch = reader. read ( ) ?;
342
+
343
+ let schema = batch. schema ( ) . clone ( ) ;
344
+
345
+ let table = crate :: table:: Table :: from_record_batches ( schema. clone ( ) , vec ! [ batch] ) ;
346
+
347
+ Ok ( DataFrame {
348
+ schema,
349
+ columns : table. columns ,
350
+ } )
351
+ }
352
+
353
+ /// Write dataframe to a feather file
354
+ ///
355
+ /// Data is currently written as individual batches (as Arrow doesn't yet support slicing).
356
+ /// This will be rectified when the above condition is met.
357
+ pub fn to_feather ( & self , path : & str ) -> Result < ( ) , ArrowError > {
358
+ use crate :: io:: feather:: * ;
359
+
360
+ let record_batches = self . to_record_batches ( ) ;
361
+
362
+ record_batches. iter ( ) . enumerate ( ) . for_each ( |( i, batch) | {
363
+ let mut file_name = String :: new ( ) ;
364
+ file_name. push_str ( path) ;
365
+ file_name. push_str ( "_" ) ;
366
+ file_name. push_str ( & i. to_string ( ) ) ;
367
+ file_name. push_str ( ".feather" ) ;
368
+ batch. write_feather ( & file_name) . unwrap ( ) ;
369
+ } ) ;
370
+
371
+ Ok ( ( ) )
372
+ }
373
+
336
374
}
337
375
338
376
mod tests {
@@ -408,4 +446,37 @@ mod tests {
408
446
409
447
assert_eq ! ( 3.335724 , abs[ 0 ] . value( 0 ) ) ;
410
448
}
449
+
450
+ #[ test]
451
+ fn feather_io ( ) {
452
+ let mut dataframe = DataFrame :: from_csv ( "./test/data/uk_cities_with_headers.csv" , None ) ;
453
+ let a = dataframe. column_by_name ( "lat" ) ;
454
+ let b = dataframe. column_by_name ( "lng" ) ;
455
+ let sum = ScalarFunctions :: add ( column_to_arrays_f64 ( a) , column_to_arrays_f64 ( b) ) ;
456
+ // TODO, make this better
457
+ let sum: Vec < ArrayRef > = sum
458
+ . unwrap ( )
459
+ . into_iter ( )
460
+ . map ( |p| Arc :: new ( p) as ArrayRef )
461
+ . collect ( ) ;
462
+ dataframe = dataframe. with_column (
463
+ "lat_lng_sum" ,
464
+ Column :: from_arrays ( sum, Field :: new ( "lat_lng_sum" , DataType :: Float64 , true ) ) ,
465
+ ) ;
466
+
467
+ let city = dataframe. column_by_name ( "city" ) ;
468
+ let lowercase = ScalarFunctions :: lower ( column_to_arrays_str ( city) ) ;
469
+ let lowercase: Vec < ArrayRef > = lowercase
470
+ . unwrap ( )
471
+ . into_iter ( )
472
+ . map ( |p| Arc :: new ( p) as ArrayRef )
473
+ . collect ( ) ;
474
+ dataframe = dataframe. with_column (
475
+ "city_lower" ,
476
+ Column :: from_arrays ( lowercase, Field :: new ( "city_lower" , DataType :: Utf8 , true ) ) ,
477
+ ) ;
478
+
479
+ let write = dataframe. to_feather ( "./test/data/uk_cities" ) ;
480
+ assert ! ( write. is_ok( ) ) ;
481
+ }
411
482
}
0 commit comments