@@ -17,49 +17,11 @@ use std::sync::Arc;
17
17
18
18
use crate :: error:: DataFrameError ;
19
19
20
- //impl From<&ArrayRef> for &PrimitiveArray<BooleanType> {
21
- // fn from(array: &ArrayRef) -> Self {
22
- // array.as_any().downcast_ref::<BooleanArray>().unwrap()
23
- // }
24
- //}
25
-
26
- //impl<T: ArrowPrimitiveType> From<&Array> for &PrimitiveArray<T> {
27
- // fn from(array: &Array) -> Self {
28
- // match array.data_type() {
29
- // DataType::Boolean => array.as_any().downcast_ref::<T>().unwrap()
30
- // }
31
- //// _ => unimplemented!("Casting array to other primitive types is not implemented")
32
- // }
33
- //}
34
-
35
- //fn array_to_primitive<T>(array: &Array) -> &PrimitiveArray<T>
36
- // where
37
- // T: ArrowPrimitiveType,
38
- //{
39
- // match array.data_type() {
40
- // DataType::Boolean => {
41
- // array.as_any().downcast_ref::<BooleanArray>().unwrap()
42
- // }
43
- // _ => unimplemented!("Casting for other array types is not implemented")
44
- // }
45
- //}
46
-
47
20
pub struct DataFrame {
48
21
schema : Arc < Schema > ,
49
22
columns : Vec < Column > ,
50
23
}
51
24
52
- // struct CsvDataSource {
53
- // reader: CsvReader,
54
- // }
55
-
56
- // impl Iterator for CsvDataSource {
57
- // type Item = Result<RecordBatch, DataFrameError>;
58
-
59
- // fn next(&mut self) -> Result<Option<Self::Item>, arrow::error::ArrowError> {
60
- // Some(Ok(self.reader.next()))
61
- // }
62
- // }
63
25
64
26
impl DataFrame {
65
27
/// Create an empty `DataFrame`
@@ -213,100 +175,96 @@ impl DataFrame {
213
175
/// Returns dataframe with specified columns selected.
214
176
///
215
177
/// If a column name does not exist, it is omitted.
216
- // pub fn select(&mut self, col_names: Vec<&str>) -> Self {
217
- // // get the names of columns from the schema, and match them with supplied
218
- // let mut col_num: i16 = -1;
219
- // let schema = &self.schema.clone();
220
- // let field_names: Vec<(usize, &str)> = schema
221
- // .fields()
222
- // .iter()
223
- // .map(|c| {
224
- // col_num += 1;
225
- // (col_num as usize, c.name().as_str())
226
- // })
227
- // .collect();
228
-
229
- // // filter names
230
- // let filter_cols: Vec<(usize, &str)> = if col_names.contains(&"*") {
231
- // field_names
232
- // } else {
233
- // // TODO follow the order of user-supplied column names
234
- // field_names
235
- // .into_iter()
236
- // .filter(|(col, name)| col_names.contains(name))
237
- // .collect()
238
- // };
239
-
240
- // // let columns = filter_cols.clone().iter().map(move |c| self.columns[c.0]).collect();
178
+ pub fn select ( & mut self , col_names : Vec < & str > ) -> Self {
179
+ // get the names of columns from the schema, and match them with supplied
180
+ let mut col_num: i16 = -1 ;
181
+ let schema = & self . schema . clone ( ) ;
182
+ let field_names: Vec < ( usize , & str ) > = schema
183
+ . fields ( )
184
+ . iter ( )
185
+ . map ( |c| {
186
+ col_num += 1 ;
187
+ ( col_num as usize , c. name ( ) . as_str ( ) )
188
+ } )
189
+ . collect ( ) ;
241
190
242
- // let mut columns = vec![];
191
+ // filter names
192
+ let filter_cols: Vec < ( usize , & str ) > = if col_names. contains ( & "*" ) {
193
+ field_names
194
+ } else {
195
+ // TODO follow the order of user-supplied column names
196
+ field_names
197
+ . into_iter ( )
198
+ . filter ( |( col, name) | col_names. contains ( name) )
199
+ . collect ( )
200
+ } ;
243
201
244
- // for (i,u) in filter_cols.clone() {
245
- // let c = &self.columns[i];
246
- // columns.push(c);
247
- // }
202
+ let mut columns = vec ! [ ] ;
248
203
249
- // let new_schema = Arc::new(Schema::new(
250
- // filter_cols
251
- // .iter()
252
- // .map(|c| schema.field(c.0).clone())
253
- // .collect(),
254
- // ));
204
+ for ( i, u) in filter_cols. clone ( ) {
205
+ let c = & self . columns [ i] ;
206
+ columns. push ( c. clone ( ) ) ;
207
+ }
255
208
256
- // dbg!(filter_cols);
209
+ let new_schema = Arc :: new ( Schema :: new (
210
+ filter_cols
211
+ . iter ( )
212
+ . map ( |c| schema. field ( c. 0 ) . clone ( ) )
213
+ . collect ( ) ,
214
+ ) ) ;
257
215
258
- // DataFrame::from_columns(new_schema, columns)
259
- // }
216
+ DataFrame :: from_columns ( new_schema, columns)
217
+ }
260
218
261
219
/// Returns a dataframe with specified columns dropped.
262
220
///
263
221
/// If a column name does not exist, it is omitted.
264
- // pub fn drop(&self, col_names: Vec<&str>) -> Self {
265
- // // get the names of columns from the schema, and match them with supplied
266
- // let mut col_num: i16 = -1;
267
- // let schema = self.schema.clone();
268
- // let field_names: Vec<(usize, &str)> = schema
269
- // .fields()
270
- // .into_iter()
271
- // .map(|c| {
272
- // col_num += 1;
273
- // (col_num as usize, c.name().as_str())
274
- // })
275
- // .collect();
276
-
277
- // // filter names
278
- // let filter_cols: Vec<(usize, &str)> = {
279
- // // TODO follow the order of user-supplied column names
280
- // field_names
281
- // .into_iter()
282
- // .filter(|(col, name)| !col_names.contains(name))
283
- // .collect()
284
- // };
285
-
286
- // // construct dataframe with selected columns
287
- // DataFrame {
288
- // schema: Arc::new(Schema::new(
289
- // filter_cols
290
- // .iter()
291
- // .map(|c| schema.field(c.0).clone())
292
- // .collect(),
293
- // )),
294
- // columns: filter_cols
295
- // .into_iter ()
296
- // .map(move |c| self.columns[c.0])
297
- // .collect(),
298
- // }
299
- // }
222
+ pub fn drop ( & self , col_names : Vec < & str > ) -> Self {
223
+ // get the names of columns from the schema, and match them with supplied
224
+ let mut col_num: i16 = -1 ;
225
+ let schema = self . schema . clone ( ) ;
226
+ let field_names: Vec < ( usize , & str ) > = schema
227
+ . fields ( )
228
+ . into_iter ( )
229
+ . map ( |c| {
230
+ col_num += 1 ;
231
+ ( col_num as usize , c. name ( ) . as_str ( ) )
232
+ } )
233
+ . collect ( ) ;
234
+
235
+ // filter names
236
+ let filter_cols: Vec < ( usize , & str ) > = {
237
+ // TODO follow the order of user-supplied column names
238
+ field_names
239
+ . into_iter ( )
240
+ . filter ( |( col, name) | !col_names. contains ( name) )
241
+ . collect ( )
242
+ } ;
243
+
244
+ // construct dataframe with selected columns
245
+ DataFrame {
246
+ schema : Arc :: new ( Schema :: new (
247
+ filter_cols
248
+ . iter ( )
249
+ . map ( |c| schema. field ( c. 0 ) . clone ( ) )
250
+ . collect ( ) ,
251
+ ) ) ,
252
+ columns : filter_cols
253
+ . iter ( )
254
+ . map ( move |c| self . columns [ c. 0 ] . clone ( ) )
255
+ . collect ( ) ,
256
+ }
257
+ }
300
258
301
259
/// Create a dataframe from an Arrow Table.
302
260
///
303
261
/// Arrow Tables are not yet in the Rust library, and we are hashing them out here
304
- // pub fn from_table(table: crate::table::Table) -> Self {
305
- // DataFrame {
306
- // schema: table.schema().clone(),
307
- // columns: * table.columns(),
308
- // }
309
- // }
262
+ pub fn from_table ( table : crate :: table:: Table ) -> Self {
263
+ DataFrame {
264
+ schema : table. schema ( ) . clone ( ) ,
265
+ columns : table. columns ( ) . to_vec ( ) ,
266
+ }
267
+ }
310
268
311
269
pub fn from_csv ( path : & str , schema : Option < Arc < Schema > > ) -> Self {
312
270
let file = File :: open ( path) . unwrap ( ) ;
0 commit comments