@@ -10,7 +10,6 @@ pub struct ChunkedArray {
10
10
chunks : Vec < Arc < Array > > ,
11
11
num_rows : usize ,
12
12
null_count : usize ,
13
- // TODO: Go has data_type, is it worth storing, or getting from the first chunk?
14
13
}
15
14
16
15
impl ChunkedArray {
@@ -40,7 +39,7 @@ impl ChunkedArray {
40
39
self . num_rows
41
40
}
42
41
43
- fn null_count ( & self ) -> usize {
42
+ pub fn null_count ( & self ) -> usize {
44
43
self . null_count
45
44
}
46
45
@@ -63,29 +62,24 @@ impl ChunkedArray {
63
62
/// The `offset` is the position of the first element in the constructed slice.
64
63
/// `length` is the length of the slice. If there are not enough elements in the chunked array,
65
64
/// the length will be adjusted accordingly.
66
- ///
67
- /// TODO: I've made length optional because CPP has 2 `slice` methods, with one being a slice
68
- /// to the end of the array.
69
- ///
70
- /// TODO: This relies on my version of slice, which I'm still implementing.
71
- // fn slice(&self, offset: usize, length: Option<usize>) -> Self {
72
- // // unimplemented!("TODO: I need help here, this has to be a zero-copy slice among slices")
73
- // let mut offset = offset;
74
- // let mut length = length.unwrap_or(std::usize::MAX);
75
- // let mut current_chunk: usize = 0;
76
- // let mut new_chunks: Vec<ArrayRef> = vec![];
77
- // while current_chunk < self.num_chunks() && offset >= self.chunk(current_chunk).len() {
78
- // offset -= self.chunk(current_chunk).len();
79
- // current_chunk += 1;
80
- // }
81
- // while current_chunk < self.num_chunks() && length > 0 {
82
- // new_chunks.push(self.chunk(current_chunk).slice(offset, length));
83
- // length -= self.chunk(current_chunk).len() - offset;
84
- // offset = 0;
85
- // current_chunk += 1;
86
- // }
87
- // Self::from_arrays(new_chunks)
88
- // }
65
+ fn slice ( & self , offset : usize , length : Option < usize > ) -> Self {
66
+ let mut offset = offset;
67
+ let mut length = length. unwrap_or ( std:: usize:: MAX ) ;
68
+ let mut current_chunk: usize = 0 ;
69
+ let mut new_chunks: Vec < ArrayRef > = vec ! [ ] ;
70
+ // compute the first offset. If offset > whole chunks' lengths, skip those chunks
71
+ while current_chunk < self . num_chunks ( ) && offset >= self . chunk ( current_chunk) . len ( ) {
72
+ offset -= self . chunk ( current_chunk) . len ( ) ;
73
+ current_chunk += 1 ;
74
+ }
75
+ while current_chunk < self . num_chunks ( ) && length > 0 {
76
+ new_chunks. push ( self . chunk ( current_chunk) . slice ( offset, length) ) ;
77
+ length -= self . chunk ( current_chunk) . len ( ) - offset;
78
+ offset = 0 ;
79
+ current_chunk += 1 ;
80
+ }
81
+ Self :: from_arrays ( new_chunks)
82
+ }
89
83
90
84
fn flatten ( & self ) {
91
85
unimplemented ! ( "This is for flattening struct columns, we aren't yet there" )
@@ -165,8 +159,17 @@ impl Column {
165
159
& self . field
166
160
}
167
161
168
- /// TODO: slice seems the same as that of `ChunkedArray`
169
- // fn slice(&self, offset: usize, length: usize) -> Self {}
162
+ pub fn slice ( & self , offset : usize , length : Option < usize > ) -> Self {
163
+ Self :: from_chunked_array ( self . data ( ) . slice ( offset, length) , self . field ( ) . clone ( ) )
164
+ }
165
+
166
+ pub fn null_count ( & self ) -> usize {
167
+ self . data ( ) . null_count ( )
168
+ }
169
+
170
+ pub fn num_rows ( & self ) -> usize {
171
+ self . data ( ) . num_rows ( )
172
+ }
170
173
171
174
fn flatten ( ) { }
172
175
}
@@ -178,23 +181,23 @@ pub struct Table {
178
181
}
179
182
180
183
impl Table {
181
- // pub fn new(schema: Arc<Schema>, columns: Vec<Column>) -> Self {
182
- // // assert that there are some columns
183
- // assert!(
184
- // columns.len() > 0,
185
- // "at least one column must be defined to create a record batch"
186
- // );
187
- // // assert that all columns have the same row count
188
- // let len = columns[0].data().len ();
189
- // for i in 1..columns.len() {
190
- // assert_eq!(
191
- // len,
192
- // columns[i].len (),
193
- // "all columns in a record batch must have the same length"
194
- // );
195
- // }
196
- // Table { schema, columns }
197
- // }
184
+ pub fn new ( schema : Arc < Schema > , columns : Vec < Column > ) -> Self {
185
+ // assert that there are some columns
186
+ assert ! (
187
+ columns. len( ) > 0 ,
188
+ "at least one column must be defined to create a record batch"
189
+ ) ;
190
+ // assert that all columns have the same row count
191
+ let len = columns[ 0 ] . data ( ) . num_rows ( ) ;
192
+ for i in 1 ..columns. len ( ) {
193
+ assert_eq ! (
194
+ len,
195
+ columns[ i] . data ( ) . num_rows ( ) ,
196
+ "all columns in a record batch must have the same length"
197
+ ) ;
198
+ }
199
+ Table { schema, columns }
200
+ }
198
201
199
202
pub fn schema ( & self ) -> & Arc < Schema > {
200
203
& self . schema
@@ -251,6 +254,19 @@ impl Table {
251
254
Table { schema, columns }
252
255
}
253
256
257
+ /// Slice the table from an offset
258
+ pub fn slice ( & self , offset : usize , limit : usize ) -> Self {
259
+ Table {
260
+ schema : self . schema . clone ( ) ,
261
+ columns : self
262
+ . columns
263
+ . clone ( )
264
+ . into_iter ( )
265
+ . map ( |col| col. slice ( offset, Some ( limit) ) )
266
+ . collect ( ) ,
267
+ }
268
+ }
269
+
254
270
/// Construct a `Table` from a sequence of Arrow `RecordBatch`es.
255
271
///
256
272
/// Columns are first created from the `RecordBatch`es, with schema validations being performed.
0 commit comments