@@ -934,25 +934,37 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> SliceToRecordBatches(const std:
934
934
}
935
935
std::sort (positions.begin (), positions.end ());
936
936
positions.erase (std::unique (positions.begin (), positions.end ()), positions.end ());
937
-
937
+ AFL_VERIFY (positions. size () > 1 )( " size " , positions. size ())( " positions " , JoinSeq ( " , " , positions));
938
938
std::vector<std::vector<std::shared_ptr<arrow::Array>>> slicedData;
939
939
slicedData.resize (positions.size () - 1 );
940
- {
941
- for (auto && i : t->columns ()) {
942
- for (ui32 idx = 0 ; idx + 1 < positions.size (); ++idx) {
943
- auto slice = i->Slice (positions[idx], positions[idx + 1 ] - positions[idx]);
944
- AFL_VERIFY (slice->num_chunks () == 1 );
945
- slicedData[idx].emplace_back (slice->chunks ().front ());
940
+ for (auto && i : t->columns ()) {
941
+ ui32 currentPosition = 0 ;
942
+ auto it = i->chunks ().begin ();
943
+ ui32 length = (*it)->length ();
944
+ for (ui32 idx = 0 ; idx + 1 < positions.size (); ++idx) {
945
+ auto chunk = (*it)->Slice (positions[idx] - currentPosition, positions[idx + 1 ] - positions[idx]);
946
+ AFL_VERIFY_DEBUG (chunk->length () == positions[idx + 1 ] - positions[idx])(" length" , chunk->length ())(
947
+ " delta" , positions[idx + 1 ] - positions[idx]);
948
+ AFL_VERIFY_DEBUG (chunk->length ())(" delta" , positions[idx + 1 ] - positions[idx]);
949
+ if (positions[idx + 1 ] - currentPosition == length) {
950
+ if (++it != i->chunks ().end ()) {
951
+ length = (*it)->length ();
952
+ }
953
+ currentPosition = positions[idx + 1 ];
946
954
}
955
+ slicedData[idx].emplace_back (chunk);
947
956
}
948
957
}
949
958
std::vector<std::shared_ptr<arrow::RecordBatch>> result;
950
959
ui32 count = 0 ;
951
960
for (auto && i : slicedData) {
961
+ AFL_VERIFY_DEBUG (i.size ());
962
+ AFL_VERIFY_DEBUG (i.front ()->length ());
952
963
result.emplace_back (arrow::RecordBatch::Make (t->schema (), i.front ()->length (), i));
953
964
count += result.back ()->num_rows ();
954
965
}
955
- AFL_VERIFY (count == t->num_rows ())(" count" , count)(" t" , t->num_rows ());
966
+ AFL_VERIFY (count == t->num_rows ())(" count" , count)(" t" , t->num_rows ())(" sd_size" , slicedData.size ())(" columns" , t->num_columns ())(
967
+ " schema" , t->schema ()->ToString ());
956
968
return result;
957
969
}
958
970
0 commit comments