Skip to content

Commit 16444c9

Browse files
optimize sub columns iterations (#15271)
1 parent 3a5cfed commit 16444c9

File tree

3 files changed

+89
-70
lines changed

3 files changed

+89
-70
lines changed

ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class TColumnsData {
4949
private:
5050
ui32 KeyIndex;
5151
std::shared_ptr<IChunkedArray> GlobalChunkedArray;
52-
std::shared_ptr<arrow::StringArray> CurrentArrayData;
52+
const arrow::StringArray* CurrentArrayData;
5353
std::optional<IChunkedArray::TFullChunkedArrayAddress> FullArrayAddress;
5454
std::optional<IChunkedArray::TFullDataAddress> ChunkAddress;
5555
ui32 CurrentIndex = 0;
@@ -63,7 +63,7 @@ class TColumnsData {
6363
const ui32 localIndex = FullArrayAddress->GetAddress().GetLocalIndex(CurrentIndex);
6464
ChunkAddress = FullArrayAddress->GetArray()->GetChunk(ChunkAddress, localIndex);
6565
AFL_VERIFY(ChunkAddress->GetArray()->type()->id() == arrow::utf8()->id());
66-
CurrentArrayData = std::static_pointer_cast<arrow::StringArray>(ChunkAddress->GetArray());
66+
CurrentArrayData = static_cast<const arrow::StringArray*>(ChunkAddress->GetArray().get());
6767
if (FullArrayAddress->GetArray()->GetType() == IChunkedArray::EType::Array) {
6868
if (CurrentArrayData->IsNull(localIndex)) {
6969
Next();

ydb/core/formats/arrow/accessor/sub_columns/iterators.h

Lines changed: 86 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -9,105 +9,123 @@ class TGeneralIterator {
99
std::variant<TColumnsData::TIterator, TOthersData::TIterator> Iterator;
1010
std::optional<ui32> RemappedKey;
1111
std::vector<ui32> RemapKeys;
12+
ui32 RecordIndex = 0;
13+
ui32 KeyIndex = 0;
14+
bool IsValidFlag = false;
15+
bool HasValueFlag = false;
16+
std::string_view Value;
17+
bool IsColumnKeyFlag = false;
18+
19+
void InitFromIterator(const TColumnsData::TIterator& iterator) {
20+
RecordIndex = iterator.GetCurrentRecordIndex();
21+
KeyIndex = RemappedKey.value_or(iterator.GetKeyIndex());
22+
IsValidFlag = true;
23+
HasValueFlag = iterator.HasValue();
24+
Value = iterator.GetValue();
25+
}
26+
27+
void InitFromIterator(const TOthersData::TIterator& iterator) {
28+
RecordIndex = iterator.GetRecordIndex();
29+
KeyIndex = RemapKeys.size() ? RemapKeys[iterator.GetKeyIndex()] : iterator.GetKeyIndex();
30+
IsValidFlag = true;
31+
HasValueFlag = iterator.HasValue();
32+
Value = iterator.GetValue();
33+
}
34+
35+
bool Initialize() {
36+
struct TVisitor {
37+
private:
38+
TGeneralIterator& Owner;
39+
public:
40+
TVisitor(TGeneralIterator& owner)
41+
: Owner(owner) {
42+
}
43+
bool operator()(TOthersData::TIterator& iterator) {
44+
Owner.IsColumnKeyFlag = false;
45+
if (iterator.IsValid()) {
46+
Owner.InitFromIterator(iterator);
47+
} else {
48+
Owner.IsValidFlag = false;
49+
}
50+
return Owner.IsValidFlag;
51+
}
52+
bool operator()(TColumnsData::TIterator& iterator) {
53+
Owner.IsColumnKeyFlag = true;
54+
if (iterator.IsValid()) {
55+
Owner.InitFromIterator(iterator);
56+
} else {
57+
Owner.IsValidFlag = false;
58+
}
59+
return Owner.IsValidFlag;
60+
}
61+
};
62+
return std::visit(TVisitor(*this), Iterator);
63+
}
1264

1365
public:
1466
TGeneralIterator(TColumnsData::TIterator&& iterator, const std::optional<ui32> remappedKey = {})
1567
: Iterator(iterator)
1668
, RemappedKey(remappedKey) {
69+
Initialize();
1770
}
1871
TGeneralIterator(TOthersData::TIterator&& iterator, const std::vector<ui32>& remapKeys = {})
1972
: Iterator(iterator)
2073
, RemapKeys(remapKeys) {
74+
Initialize();
2175
}
2276
bool IsColumnKey() const {
23-
struct TVisitor {
24-
bool operator()(const TOthersData::TIterator& /*iterator*/) {
25-
return false;
26-
}
27-
bool operator()(const TColumnsData::TIterator& /*iterator*/) {
28-
return true;
29-
}
30-
};
31-
TVisitor visitor;
32-
return std::visit(visitor, Iterator);
77+
return IsColumnKeyFlag;
3378
}
3479
bool Next() {
3580
struct TVisitor {
81+
private:
82+
TGeneralIterator& Owner;
83+
public:
84+
TVisitor(TGeneralIterator& owner)
85+
: Owner(owner)
86+
{
87+
88+
}
3689
bool operator()(TOthersData::TIterator& iterator) {
37-
return iterator.Next();
90+
if (iterator.Next()) {
91+
Owner.InitFromIterator(iterator);
92+
} else {
93+
Owner.IsValidFlag = false;
94+
}
95+
return Owner.IsValidFlag;
3896
}
3997
bool operator()(TColumnsData::TIterator& iterator) {
40-
return iterator.Next();
98+
if (iterator.Next()) {
99+
Owner.InitFromIterator(iterator);
100+
} else {
101+
Owner.IsValidFlag = false;
102+
}
103+
return Owner.IsValidFlag;
41104
}
42105
};
43-
return std::visit(TVisitor(), Iterator);
106+
return std::visit(TVisitor(*this), Iterator);
44107
}
45108
bool IsValid() const {
46-
struct TVisitor {
47-
bool operator()(const TOthersData::TIterator& iterator) {
48-
return iterator.IsValid();
49-
}
50-
bool operator()(const TColumnsData::TIterator& iterator) {
51-
return iterator.IsValid();
52-
}
53-
};
54-
return std::visit(TVisitor(), Iterator);
109+
return IsValidFlag;
55110
}
56111
ui32 GetRecordIndex() const {
57-
struct TVisitor {
58-
ui32 operator()(const TOthersData::TIterator& iterator) {
59-
return iterator.GetRecordIndex();
60-
}
61-
ui32 operator()(const TColumnsData::TIterator& iterator) {
62-
return iterator.GetCurrentRecordIndex();
63-
}
64-
};
65-
return std::visit(TVisitor(), Iterator);
112+
AFL_VERIFY(IsValidFlag);
113+
return RecordIndex;
66114
}
67115
ui32 GetKeyIndex() const {
68-
struct TVisitor {
69-
private:
70-
const TGeneralIterator& Owner;
71-
72-
public:
73-
TVisitor(const TGeneralIterator& owner)
74-
: Owner(owner) {
75-
}
76-
ui32 operator()(const TOthersData::TIterator& iterator) {
77-
return Owner.RemapKeys.size() ? Owner.RemapKeys[iterator.GetKeyIndex()] : iterator.GetKeyIndex();
78-
}
79-
ui32 operator()(const TColumnsData::TIterator& iterator) {
80-
return Owner.RemappedKey.value_or(iterator.GetKeyIndex());
81-
}
82-
};
83-
return std::visit(TVisitor(*this), Iterator);
116+
AFL_VERIFY(IsValidFlag);
117+
return KeyIndex;
84118
}
85119
std::string_view GetValue() const {
86-
struct TVisitor {
87-
std::string_view operator()(const TOthersData::TIterator& iterator) {
88-
return iterator.GetValue();
89-
}
90-
std::string_view operator()(const TColumnsData::TIterator& iterator) {
91-
return iterator.GetValue();
92-
}
93-
};
94-
return std::visit(TVisitor(), Iterator);
120+
AFL_VERIFY(IsValidFlag);
121+
return Value;
95122
}
96-
97123
bool HasValue() const {
98-
struct TVisitor {
99-
bool operator()(const TOthersData::TIterator& iterator) {
100-
return iterator.HasValue();
101-
}
102-
bool operator()(const TColumnsData::TIterator& iterator) {
103-
return iterator.HasValue();
104-
}
105-
};
106-
return std::visit(TVisitor(), Iterator);
124+
AFL_VERIFY(IsValidFlag);
125+
return HasValueFlag;
107126
}
108-
109127
bool operator<(const TGeneralIterator& item) const {
110-
return std::tuple(item.GetRecordIndex(), item.GetKeyIndex()) < std::tuple(GetRecordIndex(), GetKeyIndex());
128+
return std::tie(item.RecordIndex, item.KeyIndex) < std::tie(RecordIndex, KeyIndex);
111129
}
112130
};
113131

ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ class TSubColumnsFetchLogic: public IKernelFetchLogic {
191191
AFL_VERIFY(!!StorageId);
192192
TBlobsAction blobsAction(Source->GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN);
193193
auto reading = blobsAction.GetReading(*StorageId);
194+
reading->SetIsBackgroundProcess(false);
194195
for (auto&& i : ColumnChunks) {
195196
if (!!i.GetHeaderRange()) {
196197
const TString readBlob = blobs.Extract(*StorageId, *i.GetHeaderRange());

0 commit comments

Comments
 (0)