Skip to content
This repository was archived by the owner on Dec 29, 2021. It is now read-only.

Commit 5745219

Browse files
committed
2 parents 7a5e19b + 94c2240 commit 5745219

File tree

7 files changed

+2030
-2
lines changed

7 files changed

+2030
-2
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/target
22
**/*.rs.bk
33
Cargo.lock
4-
.idea/
4+
.idea/
5+
/test/*/*.feather

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ edition = "2018"
77
[dependencies]
88
arrow = { git = "https://github.com/apache/arrow"}
99
num = "0.2"
10-
num-traits = "0.2"
10+
num-traits = "0.2"byteorder = "1"
11+
flatbuffers = "0.5"

src/dataframe.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,44 @@ impl DataFrame {
333333
columns: table.columns,
334334
}
335335
}
336+
337+
pub fn from_feather(path: &str) -> Result<Self, ArrowError> {
338+
use crate::io::feather::*;
339+
340+
let mut reader = FeatherReader::new(File::open(path)?);
341+
let batch = reader.read()?;
342+
343+
let schema = batch.schema().clone();
344+
345+
let table = crate::table::Table::from_record_batches(schema.clone(), vec![batch]);
346+
347+
Ok(DataFrame {
348+
schema,
349+
columns: table.columns,
350+
})
351+
}
352+
353+
/// Write dataframe to a feather file
354+
///
355+
/// Data is currently written as individual batches (as Arrow doesn't yet support slicing).
356+
/// This will be rectified when the above condition is met.
357+
pub fn to_feather(&self, path: &str) -> Result<(), ArrowError> {
358+
use crate::io::feather::*;
359+
360+
let record_batches = self.to_record_batches();
361+
362+
record_batches.iter().enumerate().for_each(|(i, batch)| {
363+
let mut file_name = String::new();
364+
file_name.push_str(path);
365+
file_name.push_str("_");
366+
file_name.push_str(&i.to_string());
367+
file_name.push_str(".feather");
368+
batch.write_feather(&file_name).unwrap();
369+
});
370+
371+
Ok(())
372+
}
373+
336374
}
337375

338376
mod tests {
@@ -408,4 +446,37 @@ mod tests {
408446

409447
assert_eq!(3.335724, abs[0].value(0));
410448
}
449+
450+
#[test]
451+
fn feather_io() {
452+
let mut dataframe = DataFrame::from_csv("./test/data/uk_cities_with_headers.csv", None);
453+
let a = dataframe.column_by_name("lat");
454+
let b = dataframe.column_by_name("lng");
455+
let sum = ScalarFunctions::add(column_to_arrays_f64(a), column_to_arrays_f64(b));
456+
// TODO, make this better
457+
let sum: Vec<ArrayRef> = sum
458+
.unwrap()
459+
.into_iter()
460+
.map(|p| Arc::new(p) as ArrayRef)
461+
.collect();
462+
dataframe = dataframe.with_column(
463+
"lat_lng_sum",
464+
Column::from_arrays(sum, Field::new("lat_lng_sum", DataType::Float64, true)),
465+
);
466+
467+
let city = dataframe.column_by_name("city");
468+
let lowercase = ScalarFunctions::lower(column_to_arrays_str(city));
469+
let lowercase: Vec<ArrayRef> = lowercase
470+
.unwrap()
471+
.into_iter()
472+
.map(|p| Arc::new(p) as ArrayRef)
473+
.collect();
474+
dataframe = dataframe.with_column(
475+
"city_lower",
476+
Column::from_arrays(lowercase, Field::new("city_lower", DataType::Utf8, true)),
477+
);
478+
479+
let write = dataframe.to_feather("./test/data/uk_cities");
480+
assert!(write.is_ok());
481+
}
411482
}

0 commit comments

Comments
 (0)