Skip to content
This repository was archived by the owner on Dec 29, 2021. It is now read-only.

Commit 7b97f4e

Browse files
committed
re-enable some dataframe operations, update README
1 parent 22b92be commit 7b97f4e

File tree

2 files changed

+80
-122
lines changed

2 files changed

+80
-122
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ To that end, we're trying to support CSV, JSON, and perhaps other simpler file f
3939
- IO Support
4040
- [ ] CSV
4141
- [X] Read
42-
- [ ] Write
42+
- [X] Write
4343
- [ ] JSON
4444
- [X] Read (submitted to Arrow)
4545
- [ ] Write
@@ -52,11 +52,11 @@ To that end, we're trying to support CSV, JSON, and perhaps other simpler file f
5252
- DataFrame Operations
5353
<!-- - [x] Read CSV into dataframe -->
5454
- [X] Select single column
55-
- [ ] Select subset of columns, drop columns
55+
- [X] Select subset of columns, drop columns
5656
- [X] Add or remove columns
5757
- [X] Rename columns
5858
- [ ] Create dataframe from record batches (a `Vec<RecordBatch>` as well as an iterator)
59-
- [ ] Write dataframe to CSV (and other formats as and when Arrow supports them)
59+
- [X] Write dataframe to CSV (and other formats as and when Arrow supports them)
6060
- [ ] Sort dataframes
6161
- [ ] Grouped operations
6262
- [ ] Filter dataframes

src/dataframe.rs

Lines changed: 77 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -17,49 +17,11 @@ use std::sync::Arc;
1717

1818
use crate::error::DataFrameError;
1919

20-
//impl From<&ArrayRef> for &PrimitiveArray<BooleanType> {
21-
// fn from(array: &ArrayRef) -> Self {
22-
// array.as_any().downcast_ref::<BooleanArray>().unwrap()
23-
// }
24-
//}
25-
26-
//impl<T: ArrowPrimitiveType> From<&Array> for &PrimitiveArray<T> {
27-
// fn from(array: &Array) -> Self {
28-
// match array.data_type() {
29-
// DataType::Boolean => array.as_any().downcast_ref::<T>().unwrap()
30-
// }
31-
//// _ => unimplemented!("Casting array to other primitive types is not implemented")
32-
// }
33-
//}
34-
35-
//fn array_to_primitive<T>(array: &Array) -> &PrimitiveArray<T>
36-
// where
37-
// T: ArrowPrimitiveType,
38-
//{
39-
// match array.data_type() {
40-
// DataType::Boolean => {
41-
// array.as_any().downcast_ref::<BooleanArray>().unwrap()
42-
// }
43-
// _ => unimplemented!("Casting for other array types is not implemented")
44-
// }
45-
//}
46-
4720
pub struct DataFrame {
4821
schema: Arc<Schema>,
4922
columns: Vec<Column>,
5023
}
5124

52-
// struct CsvDataSource {
53-
// reader: CsvReader,
54-
// }
55-
56-
// impl Iterator for CsvDataSource {
57-
// type Item = Result<RecordBatch, DataFrameError>;
58-
59-
// fn next(&mut self) -> Result<Option<Self::Item>, arrow::error::ArrowError> {
60-
// Some(Ok(self.reader.next()))
61-
// }
62-
// }
6325

6426
impl DataFrame {
6527
/// Create an empty `DataFrame`
@@ -213,100 +175,96 @@ impl DataFrame {
213175
/// Returns dataframe with specified columns selected.
214176
///
215177
/// If a column name does not exist, it is omitted.
216-
// pub fn select(&mut self, col_names: Vec<&str>) -> Self {
217-
// // get the names of columns from the schema, and match them with supplied
218-
// let mut col_num: i16 = -1;
219-
// let schema = &self.schema.clone();
220-
// let field_names: Vec<(usize, &str)> = schema
221-
// .fields()
222-
// .iter()
223-
// .map(|c| {
224-
// col_num += 1;
225-
// (col_num as usize, c.name().as_str())
226-
// })
227-
// .collect();
228-
229-
// // filter names
230-
// let filter_cols: Vec<(usize, &str)> = if col_names.contains(&"*") {
231-
// field_names
232-
// } else {
233-
// // TODO follow the order of user-supplied column names
234-
// field_names
235-
// .into_iter()
236-
// .filter(|(col, name)| col_names.contains(name))
237-
// .collect()
238-
// };
239-
240-
// // let columns = filter_cols.clone().iter().map(move |c| self.columns[c.0]).collect();
178+
pub fn select(&mut self, col_names: Vec<&str>) -> Self {
179+
// get the names of columns from the schema, and match them with supplied
180+
let mut col_num: i16 = -1;
181+
let schema = &self.schema.clone();
182+
let field_names: Vec<(usize, &str)> = schema
183+
.fields()
184+
.iter()
185+
.map(|c| {
186+
col_num += 1;
187+
(col_num as usize, c.name().as_str())
188+
})
189+
.collect();
241190

242-
// let mut columns = vec![];
191+
// filter names
192+
let filter_cols: Vec<(usize, &str)> = if col_names.contains(&"*") {
193+
field_names
194+
} else {
195+
// TODO follow the order of user-supplied column names
196+
field_names
197+
.into_iter()
198+
.filter(|(col, name)| col_names.contains(name))
199+
.collect()
200+
};
243201

244-
// for (i,u) in filter_cols.clone() {
245-
// let c = &self.columns[i];
246-
// columns.push(c);
247-
// }
202+
let mut columns = vec![];
248203

249-
// let new_schema = Arc::new(Schema::new(
250-
// filter_cols
251-
// .iter()
252-
// .map(|c| schema.field(c.0).clone())
253-
// .collect(),
254-
// ));
204+
for (i, u) in filter_cols.clone() {
205+
let c = &self.columns[i];
206+
columns.push(c.clone());
207+
}
255208

256-
// dbg!(filter_cols);
209+
let new_schema = Arc::new(Schema::new(
210+
filter_cols
211+
.iter()
212+
.map(|c| schema.field(c.0).clone())
213+
.collect(),
214+
));
257215

258-
// DataFrame::from_columns(new_schema, columns)
259-
// }
216+
DataFrame::from_columns(new_schema, columns)
217+
}
260218

261219
/// Returns a dataframe with specified columns dropped.
262220
///
263221
/// If a column name does not exist, it is omitted.
264-
// pub fn drop(&self, col_names: Vec<&str>) -> Self {
265-
// // get the names of columns from the schema, and match them with supplied
266-
// let mut col_num: i16 = -1;
267-
// let schema = self.schema.clone();
268-
// let field_names: Vec<(usize, &str)> = schema
269-
// .fields()
270-
// .into_iter()
271-
// .map(|c| {
272-
// col_num += 1;
273-
// (col_num as usize, c.name().as_str())
274-
// })
275-
// .collect();
276-
277-
// // filter names
278-
// let filter_cols: Vec<(usize, &str)> = {
279-
// // TODO follow the order of user-supplied column names
280-
// field_names
281-
// .into_iter()
282-
// .filter(|(col, name)| !col_names.contains(name))
283-
// .collect()
284-
// };
285-
286-
// // construct dataframe with selected columns
287-
// DataFrame {
288-
// schema: Arc::new(Schema::new(
289-
// filter_cols
290-
// .iter()
291-
// .map(|c| schema.field(c.0).clone())
292-
// .collect(),
293-
// )),
294-
// columns: filter_cols
295-
// .into_iter()
296-
// .map(move |c| self.columns[c.0])
297-
// .collect(),
298-
// }
299-
// }
222+
pub fn drop(&self, col_names: Vec<&str>) -> Self {
223+
// get the names of columns from the schema, and match them with supplied
224+
let mut col_num: i16 = -1;
225+
let schema = self.schema.clone();
226+
let field_names: Vec<(usize, &str)> = schema
227+
.fields()
228+
.into_iter()
229+
.map(|c| {
230+
col_num += 1;
231+
(col_num as usize, c.name().as_str())
232+
})
233+
.collect();
234+
235+
// filter names
236+
let filter_cols: Vec<(usize, &str)> = {
237+
// TODO follow the order of user-supplied column names
238+
field_names
239+
.into_iter()
240+
.filter(|(col, name)| !col_names.contains(name))
241+
.collect()
242+
};
243+
244+
// construct dataframe with selected columns
245+
DataFrame {
246+
schema: Arc::new(Schema::new(
247+
filter_cols
248+
.iter()
249+
.map(|c| schema.field(c.0).clone())
250+
.collect(),
251+
)),
252+
columns: filter_cols
253+
.iter()
254+
.map(move |c| self.columns[c.0].clone())
255+
.collect(),
256+
}
257+
}
300258

301259
/// Create a dataframe from an Arrow Table.
302260
///
303261
/// Arrow Tables are not yet in the Rust library, and we are hashing them out here
304-
// pub fn from_table(table: crate::table::Table) -> Self {
305-
// DataFrame {
306-
// schema: table.schema().clone(),
307-
// columns: *table.columns(),
308-
// }
309-
// }
262+
pub fn from_table(table: crate::table::Table) -> Self {
263+
DataFrame {
264+
schema: table.schema().clone(),
265+
columns: table.columns().to_vec(),
266+
}
267+
}
310268

311269
pub fn from_csv(path: &str, schema: Option<Arc<Schema>>) -> Self {
312270
let file = File::open(path).unwrap();

0 commit comments

Comments
 (0)