1
1
#include " filter.h"
2
+
3
+ #include < ydb/core/formats/arrow/serializer/native.h>
4
+
2
5
#include < ydb/library/actors/core/log.h>
3
6
4
7
namespace NKikimr ::NOlap {
@@ -14,43 +17,50 @@ NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(const arrow::Datum&
14
17
return result;
15
18
}
16
19
17
- bool TPKRangesFilter::Add (std::shared_ptr<NOlap::TPredicate> f, std::shared_ptr<NOlap::TPredicate> t, const TIndexInfo* indexInfo) {
20
+ TConclusionStatus TPKRangesFilter::Add (
21
+ std::shared_ptr<NOlap::TPredicate> f, std::shared_ptr<NOlap::TPredicate> t, const std::shared_ptr<arrow::Schema>& pkSchema) {
18
22
if ((!f || f->Empty ()) && (!t || t->Empty ())) {
19
- return true ;
23
+ return TConclusionStatus::Success ();
24
+ }
25
+ auto fromContainerConclusion = TPredicateContainer::BuildPredicateFrom (f, pkSchema);
26
+ if (fromContainerConclusion.IsFail ()) {
27
+ AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " add_range_filter" )(" problem" , " incorrect from container" )(
28
+ " from" , fromContainerConclusion.GetErrorMessage ());
29
+ return fromContainerConclusion;
20
30
}
21
- auto fromContainer = TPredicateContainer::BuildPredicateFrom (f, indexInfo );
22
- auto toContainer = TPredicateContainer::BuildPredicateTo (t, indexInfo);
23
- if (!fromContainer || !toContainer) {
24
- AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)( " event " , " add_range_filter " )( " problem " , " incorrect from/to containers " )( " from" , !!fromContainer)( " to " , !!toContainer );
25
- return false ;
31
+ auto toContainerConclusion = TPredicateContainer::BuildPredicateTo (t, pkSchema );
32
+ if (toContainerConclusion. IsFail ()) {
33
+ AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)( " event " , " add_range_filter " )( " problem " , " incorrect to container " )(
34
+ " from" , toContainerConclusion. GetErrorMessage () );
35
+ return toContainerConclusion ;
26
36
}
27
37
if (SortedRanges.size () && !FakeRanges) {
28
38
if (ReverseFlag) {
29
- if (fromContainer ->CrossRanges (SortedRanges.front ().GetPredicateTo ())) {
39
+ if (fromContainerConclusion ->CrossRanges (SortedRanges.front ().GetPredicateTo ())) {
30
40
AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " add_range_filter" )(" problem" , " not sorted sequence" );
31
- return false ;
41
+ return TConclusionStatus::Fail ( " not sorted sequence " ) ;
32
42
}
33
43
} else {
34
- if (fromContainer ->CrossRanges (SortedRanges.back ().GetPredicateTo ())) {
44
+ if (fromContainerConclusion ->CrossRanges (SortedRanges.back ().GetPredicateTo ())) {
35
45
AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " add_range_filter" )(" problem" , " not sorted sequence" );
36
- return false ;
46
+ return TConclusionStatus::Fail ( " not sorted sequence " ) ;
37
47
}
38
48
}
39
49
}
40
- auto pkRangeFilter = TPKRangeFilter::Build (std::move (*fromContainer ), std::move (*toContainer ));
41
- if (!pkRangeFilter ) {
42
- return false ;
50
+ auto pkRangeFilterConclusion = TPKRangeFilter::Build (fromContainerConclusion. DetachResult ( ), toContainerConclusion. DetachResult ( ));
51
+ if (pkRangeFilterConclusion. IsFail () ) {
52
+ return pkRangeFilterConclusion ;
43
53
}
44
54
if (FakeRanges) {
45
55
FakeRanges = false ;
46
56
SortedRanges.clear ();
47
57
}
48
58
if (ReverseFlag) {
49
- SortedRanges.emplace_front (std::move (*pkRangeFilter ));
59
+ SortedRanges.emplace_front (pkRangeFilterConclusion. DetachResult ( ));
50
60
} else {
51
- SortedRanges.emplace_back (std::move (*pkRangeFilter ));
61
+ SortedRanges.emplace_back (pkRangeFilterConclusion. DetachResult ( ));
52
62
}
53
- return true ;
63
+ return TConclusionStatus::Success () ;
54
64
}
55
65
56
66
TString TPKRangesFilter::DebugString () const {
@@ -84,6 +94,15 @@ bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info) const {
84
94
return SortedRanges.empty ();
85
95
}
86
96
97
+ bool TPKRangesFilter::CheckPoint (const NArrow::TReplaceKey& point) const {
98
+ for (auto && i : SortedRanges) {
99
+ if (i.CheckPoint (point)) {
100
+ return true ;
101
+ }
102
+ }
103
+ return SortedRanges.empty ();
104
+ }
105
+
87
106
TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage (const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const {
88
107
for (auto && i : SortedRanges) {
89
108
switch (i.IsPortionInPartialUsage (start, end)) {
@@ -99,11 +118,101 @@ TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage(const NArro
99
118
}
100
119
101
120
TPKRangesFilter::TPKRangesFilter (const bool reverse)
102
- : ReverseFlag(reverse)
103
- {
121
+ : ReverseFlag(reverse) {
104
122
auto range = TPKRangeFilter::Build (TPredicateContainer::BuildNullPredicateFrom (), TPredicateContainer::BuildNullPredicateTo ());
105
123
Y_ABORT_UNLESS (range);
106
124
SortedRanges.emplace_back (*range);
107
125
}
108
126
127
+ std::shared_ptr<arrow::RecordBatch> TPKRangesFilter::SerializeToRecordBatch (const std::shared_ptr<arrow::Schema>& pkSchema) const {
128
+ auto fullSchema = NArrow::TStatusValidator::GetValid (
129
+ pkSchema->AddField (pkSchema->num_fields (), std::make_shared<arrow::Field>(" .ydb_operation_type" , arrow::uint32 ())));
130
+ auto builders = NArrow::MakeBuilders (fullSchema, SortedRanges.size () * 2 );
131
+ for (auto && i : SortedRanges) {
132
+ for (ui32 idx = 0 ; idx < (ui32)pkSchema->num_fields (); ++idx) {
133
+ if (idx < i.GetPredicateFrom ().GetReplaceKey ()->Size ()) {
134
+ AFL_VERIFY (NArrow::Append (
135
+ *builders[idx], i.GetPredicateFrom ().GetReplaceKey ()->Column (idx), i.GetPredicateFrom ().GetReplaceKey ()->GetPosition ()));
136
+ } else {
137
+ NArrow::TStatusValidator::Validate (builders[idx]->AppendNull ());
138
+ }
139
+ }
140
+ NArrow::Append<arrow::UInt32Type>(*builders[pkSchema->num_fields ()], (ui32)i.GetPredicateFrom ().GetCompareType ());
141
+
142
+ for (ui32 idx = 0 ; idx < (ui32)pkSchema->num_fields (); ++idx) {
143
+ if (idx < i.GetPredicateTo ().GetReplaceKey ()->Size ()) {
144
+ AFL_VERIFY (NArrow::Append (
145
+ *builders[idx], i.GetPredicateTo ().GetReplaceKey ()->Column (idx), i.GetPredicateTo ().GetReplaceKey ()->GetPosition ()));
146
+ } else {
147
+ NArrow::TStatusValidator::Validate (builders[idx]->AppendNull ());
148
+ }
149
+ }
150
+ NArrow::Append<arrow::UInt32Type>(*builders[pkSchema->num_fields ()], (ui32)i.GetPredicateTo ().GetCompareType ());
151
+ }
152
+ return arrow::RecordBatch::Make (fullSchema, SortedRanges.size () * 2 , NArrow::Finish (std::move (builders)));
109
153
}
154
+
155
+ std::shared_ptr<NKikimr::NOlap::TPKRangesFilter> TPKRangesFilter::BuildFromRecordBatchLines (
156
+ const std::shared_ptr<arrow::RecordBatch>& batch, const bool reverse) {
157
+ std::shared_ptr<TPKRangesFilter> result = std::make_shared<TPKRangesFilter>(reverse);
158
+ for (ui32 i = 0 ; i < batch->num_rows (); ++i) {
159
+ auto batchRow = batch->Slice (i, 1 );
160
+ auto pFrom = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::GreaterEqual, batchRow);
161
+ auto pTo = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::LessEqual, batchRow);
162
+ result->Add (pFrom, pTo, batch->schema ()).Validate ();
163
+ }
164
+ return result;
165
+ }
166
+
167
+ std::shared_ptr<NKikimr::NOlap::TPKRangesFilter> TPKRangesFilter::BuildFromRecordBatchFull (
168
+ const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& pkSchema, const bool reverse) {
169
+ std::shared_ptr<TPKRangesFilter> result = std::make_shared<TPKRangesFilter>(reverse);
170
+ auto pkBatch = NArrow::TColumnOperator ().Adapt (batch, pkSchema).DetachResult ();
171
+ auto c = batch->GetColumnByName (" .ydb_operation_type" );
172
+ AFL_VERIFY (c);
173
+ AFL_VERIFY (c->type_id () == arrow::Type::UINT32);
174
+ auto cUi32 = static_pointer_cast<arrow::UInt32Array>(c);
175
+ for (ui32 i = 0 ; i < batch->num_rows ();) {
176
+ std::shared_ptr<NOlap::TPredicate> pFrom;
177
+ std::shared_ptr<NOlap::TPredicate> pTo;
178
+ {
179
+ auto batchRow = TPredicate::CutNulls (batch->Slice (i, 1 ));
180
+ NKernels::EOperation op = (NKernels::EOperation)cUi32->Value (i);
181
+ if (op == NKernels::EOperation::GreaterEqual || op == NKernels::EOperation::Greater) {
182
+ pFrom = std::make_shared<NOlap::TPredicate>(op, batchRow);
183
+ } else if (op == NKernels::EOperation::Equal) {
184
+ pFrom = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::GreaterEqual, batchRow);
185
+ } else {
186
+ AFL_VERIFY (false );
187
+ }
188
+ if (op != NKernels::EOperation::Equal) {
189
+ ++i;
190
+ }
191
+ }
192
+ {
193
+ auto batchRow = TPredicate::CutNulls (batch->Slice (i, 1 ));
194
+ NKernels::EOperation op = (NKernels::EOperation)cUi32->Value (i);
195
+ if (op == NKernels::EOperation::LessEqual || op == NKernels::EOperation::Less) {
196
+ pTo = std::make_shared<NOlap::TPredicate>(op, batchRow);
197
+ } else if (op == NKernels::EOperation::Equal) {
198
+ pTo = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::LessEqual, batchRow);
199
+ } else {
200
+ AFL_VERIFY (false );
201
+ }
202
+ }
203
+ result->Add (pFrom, pTo, pkSchema).Validate ();
204
+ }
205
+ return result;
206
+ }
207
+
208
+ std::shared_ptr<NKikimr::NOlap::TPKRangesFilter> TPKRangesFilter::BuildFromString (
209
+ const TString& data, const std::shared_ptr<arrow::Schema>& pkSchema, const bool reverse) {
210
+ auto batch = NArrow::TStatusValidator::GetValid (NArrow::NSerialization::TNativeSerializer ().Deserialize (data));
211
+ return BuildFromRecordBatchFull (batch, pkSchema, reverse);
212
+ }
213
+
214
+ TString TPKRangesFilter::SerializeToString (const std::shared_ptr<arrow::Schema>& pkSchema) const {
215
+ return NArrow::NSerialization::TNativeSerializer ().SerializeFull (SerializeToRecordBatch (pkSchema));
216
+ }
217
+
218
+ } // namespace NKikimr::NOlap
0 commit comments