Skip to content

Commit 9016403

Browse files
ivanmorozov333ivanmorozov333
andauthored
compaction correct settings (#19244)
Co-authored-by: ivanmorozov333 <imorozov333@ya.ru>
1 parent 2df3e61 commit 9016403

File tree

20 files changed

+182
-108
lines changed

20 files changed

+182
-108
lines changed

ydb/core/kqp/ut/olap/kqp_olap_ut.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3278,7 +3278,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
32783278
auto alterQuery =
32793279
R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, `COMPACTION_PLANNER.CLASS_NAME`=`lc-buckets`, `COMPACTION_PLANNER.FEATURES`=`
32803280
{"levels" : [{"class_name" : "Zero", "expected_blobs_size" : 1, "portions_count_available" : 3},
3281-
{"class_name" : "Zero"}]}`);
3281+
{"class_name" : "Zero", "expected_blobs_size" : 1}]}`);
32823282
)";
32833283
auto result = session.ExecuteQuery(alterQuery, NQuery::TTxControl::NoTx()).GetValueSync();
32843284
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToOneLineString());

ydb/core/tx/columnshard/common/portion.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
#pragma once
2-
#include <util/system/types.h>
2+
#include <ydb/core/formats/arrow/rows/view.h>
3+
34
#include <util/generic/string.h>
5+
#include <util/system/types.h>
46

57
namespace NKikimr::NOlap::NPortion {
68
// NOTE: These values are persisted in LocalDB so they must be stable
7-
enum EProduced: ui32 {
9+
enum EProduced : ui32 {
810
UNSPECIFIED = 0,
911
INSERTED,
1012
COMPACTED,
@@ -25,4 +27,25 @@ class TSpecialColumns {
2527
static constexpr const ui32 SPEC_COL_DELETE_FLAG_INDEX = SPEC_COL_PLAN_STEP_INDEX + 3;
2628
};
2729

28-
}
30+
class TPortionInfoForCompaction {
31+
private:
32+
YDB_READONLY(ui64, TotalBlobBytes, 0);
33+
const NArrow::TSimpleRow FirstPK;
34+
const NArrow::TSimpleRow LastPK;
35+
36+
public:
37+
TPortionInfoForCompaction(const ui64 totalBlobBytes, const NArrow::TSimpleRow& firstPK, const NArrow::TSimpleRow& lastPK)
38+
: TotalBlobBytes(totalBlobBytes)
39+
, FirstPK(firstPK)
40+
, LastPK(lastPK) {
41+
}
42+
43+
const NArrow::TSimpleRow& GetFirstPK() const {
44+
return FirstPK;
45+
}
46+
const NArrow::TSimpleRow& GetLastPK() const {
47+
return LastPK;
48+
}
49+
};
50+
51+
} // namespace NKikimr::NOlap::NPortion

ydb/core/tx/columnshard/engines/changes/compaction.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ void TCompactColumnEngineChanges::DoCompile(TFinalizationContext& context) {
2525

2626
for (auto& portionInfo : AppendedPortions) {
2727
auto& constructor = portionInfo.GetPortionConstructor().MutablePortionConstructor();
28-
constructor.MutableMeta().SetCompactionLevel(GranuleMeta->GetOptimizerPlanner().GetAppropriateLevel(
29-
GetPortionsToMove().GetTargetCompactionLevel().value_or(0), portionInfo.GetPortionConstructor()));
28+
constructor.MutableMeta().SetCompactionLevel(
29+
GranuleMeta->GetOptimizerPlanner().GetAppropriateLevel(GetPortionsToMove().GetTargetCompactionLevel().value_or(0),
30+
portionInfo.GetPortionConstructor().GetCompactionInfo()));
3031
}
3132
}
3233

ydb/core/tx/columnshard/engines/portions/constructor_accessor.cpp

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,12 @@ TPortionDataAccessor TPortionAccessorConstructor::Build(const bool needChunksNor
3838

3939
AFL_VERIFY(Records.size());
4040

41-
PortionInfo->MetaConstructor.ColumnRawBytes = 0;
42-
PortionInfo->MetaConstructor.ColumnBlobBytes = 0;
43-
PortionInfo->MetaConstructor.IndexRawBytes = 0;
44-
PortionInfo->MetaConstructor.IndexBlobBytes = 0;
41+
PortionInfo->MetaConstructor.ColumnRawBytes = GetColumnRawBytes();
42+
PortionInfo->MetaConstructor.ColumnBlobBytes = GetColumnBlobBytes();
43+
PortionInfo->MetaConstructor.IndexRawBytes = GetIndexRawBytes();
44+
PortionInfo->MetaConstructor.IndexBlobBytes = GetIndexBlobBytes();
4545

4646
PortionInfo->MetaConstructor.RecordsCount = CalcRecordsCount();
47-
for (auto&& r : Records) {
48-
*PortionInfo->MetaConstructor.ColumnRawBytes += r.GetMeta().GetRawBytes();
49-
*PortionInfo->MetaConstructor.ColumnBlobBytes += r.GetBlobRange().GetSize();
50-
}
51-
for (auto&& r : Indexes) {
52-
*PortionInfo->MetaConstructor.IndexRawBytes += r.GetRawBytes();
53-
*PortionInfo->MetaConstructor.IndexBlobBytes += r.GetDataSize();
54-
}
5547

5648
std::shared_ptr<TPortionInfo> result = PortionInfo->Build();
5749

ydb/core/tx/columnshard/engines/portions/constructor_accessor.h

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,46 @@ class TPortionAccessorConstructor {
145145
}
146146

147147
ui64 GetTotalBlobsSize() const {
148+
return GetColumnBlobBytes() + GetIndexBlobBytes();
149+
}
150+
151+
NPortion::TPortionInfoForCompaction GetCompactionInfo() const {
152+
return NPortion::TPortionInfoForCompaction(
153+
GetTotalBlobsSize(), PortionInfo->GetMeta().GetFirstAndLastPK().GetFirst(), PortionInfo->GetMeta().GetFirstAndLastPK().GetLast());
154+
}
155+
156+
ui64 GetColumnBlobBytes() const {
157+
AFL_VERIFY(Records.size());
158+
ui64 result = 0;
159+
for (auto&& r : Records) {
160+
result += r.GetBlobRange().GetSize();
161+
}
162+
return result;
163+
}
164+
165+
ui64 GetColumnRawBytes() const {
148166
AFL_VERIFY(Records.size());
149-
ui64 size = 0;
167+
ui64 result = 0;
150168
for (auto&& r : Records) {
151-
size += r.GetBlobRange().GetSize();
169+
result += r.GetMeta().GetRawBytes();
152170
}
171+
return result;
172+
}
173+
174+
ui64 GetIndexBlobBytes() const {
175+
ui64 result = 0;
153176
for (auto&& r : Indexes) {
154-
size += r.GetDataSize();
177+
result += r.GetDataSize();
155178
}
156-
return size;
179+
return result;
180+
}
181+
182+
ui64 GetIndexRawBytes() const {
183+
ui64 result = 0;
184+
for (auto&& r : Indexes) {
185+
result += r.GetRawBytes();
186+
}
187+
return result;
157188
}
158189

159190
static TPortionAccessorConstructor BuildForRewriteBlobs(const TPortionInfo& portion) {

ydb/core/tx/columnshard/engines/portions/constructor_meta.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ class TPortionMetaConstructor: public TPortionMetaBase {
3636
TPortionMetaConstructor() = default;
3737
TPortionMetaConstructor(const TPortionMeta& meta, const bool withBlobs);
3838

39+
const NArrow::TFirstLastSpecialKeys& GetFirstAndLastPK() const {
40+
AFL_VERIFY(FirstAndLastPK);
41+
return *FirstAndLastPK;
42+
}
43+
3944
ui64 GetTotalBlobBytes() const {
4045
AFL_VERIFY(ColumnBlobBytes);
4146
AFL_VERIFY(IndexBlobBytes);

ydb/core/tx/columnshard/engines/portions/portion_info.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ class TPortionInfo {
116116
public:
117117
virtual EPortionType GetPortionType() const = 0;
118118
virtual bool IsCommitted() const = 0;
119+
NPortion::TPortionInfoForCompaction GetCompactionInfo() const {
120+
return NPortion::TPortionInfoForCompaction(GetTotalBlobBytes(), GetMeta().IndexKeyStart(), GetMeta().IndexKeyEnd());
121+
}
119122

120123
ui64 GetMemorySize() const {
121124
return sizeof(TPortionInfo) + Meta.GetMemorySize() - sizeof(TPortionMeta);

ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#pragma once
22
#include <ydb/core/base/appdata.h>
33
#include <ydb/core/formats/arrow/reader/position.h>
4+
#include <ydb/core/tx/columnshard/common/path_id.h>
5+
#include <ydb/core/tx/columnshard/common/portion.h>
46

57
#include <ydb/library/conclusion/result.h>
68
#include <ydb/services/bg_tasks/abstract/interface.h>
79

810
#include <contrib/libs/apache/arrow/cpp/src/arrow/type.h>
911
#include <library/cpp/object_factory/object_factory.h>
10-
#include <ydb/core/tx/columnshard/common/path_id.h>
1112

1213
namespace NKikimr::NOlap {
1314
class TColumnEngineChanges;
@@ -80,6 +81,8 @@ class TTaskDescription {
8081
}
8182
};
8283

84+
using TPortionInfoForCompaction = NPortion::TPortionInfoForCompaction;
85+
8386
class IOptimizerPlanner {
8487
private:
8588
const TInternalPathId PathId;
@@ -88,9 +91,10 @@ class IOptimizerPlanner {
8891
virtual bool DoIsOverloaded() const {
8992
return false;
9093
}
94+
9195
protected:
92-
virtual void DoModifyPortions(const THashMap<ui64, std::shared_ptr<TPortionInfo>>& add,
93-
const THashMap<ui64, std::shared_ptr<TPortionInfo>>& remove) = 0;
96+
virtual void DoModifyPortions(
97+
const THashMap<ui64, std::shared_ptr<TPortionInfo>>& add, const THashMap<ui64, std::shared_ptr<TPortionInfo>>& remove) = 0;
9498
virtual std::shared_ptr<TColumnEngineChanges> DoGetOptimizationTask(
9599
std::shared_ptr<TGranuleMeta> granule, const std::shared_ptr<NDataLocks::TManager>& dataLocksManager) const = 0;
96100
virtual TOptimizationPriority DoGetUsefulMetric() const = 0;
@@ -108,7 +112,7 @@ class IOptimizerPlanner {
108112
}
109113

110114
public:
111-
virtual ui32 GetAppropriateLevel(const ui32 baseLevel, const TPortionAccessorConstructor& /*info*/) const {
115+
virtual ui32 GetAppropriateLevel(const ui32 baseLevel, const TPortionInfoForCompaction& /*info*/) const {
112116
return baseLevel;
113117
}
114118

@@ -159,8 +163,7 @@ class IOptimizerPlanner {
159163
return DoSerializeToJsonVisual();
160164
}
161165

162-
void ModifyPortions(const THashMap<ui64, std::shared_ptr<TPortionInfo>>& add,
163-
const THashMap<ui64, std::shared_ptr<TPortionInfo>>& remove) {
166+
void ModifyPortions(const THashMap<ui64, std::shared_ptr<TPortionInfo>>& add, const THashMap<ui64, std::shared_ptr<TPortionInfo>>& remove) {
164167
NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("path_id", PathId));
165168
DoModifyPortions(add, remove);
166169
}
@@ -179,23 +182,24 @@ class IOptimizerPlanner {
179182
class IOptimizerPlannerConstructor {
180183
public:
181184
enum class EOptimizerStrategy {
182-
Default, //use One Layer levels to avoid portion intersections
183-
Logs, // use Zero Levels only for performance
185+
Default, //use One Layer levels to avoid portion intersections
186+
Logs, // use Zero Levels only for performance
184187
LogsInStore
185188
};
186189
class TBuildContext {
187190
private:
188191
YDB_READONLY_DEF(TInternalPathId, PathId);
189192
YDB_READONLY_DEF(std::shared_ptr<IStoragesManager>, Storages);
190193
YDB_READONLY_DEF(std::shared_ptr<arrow::Schema>, PKSchema);
191-
YDB_READONLY_DEF(EOptimizerStrategy, DefaultStrategy);
194+
YDB_READONLY_DEF(EOptimizerStrategy, DefaultStrategy);
192195

193196
public:
194-
TBuildContext(const TInternalPathId pathId, const std::shared_ptr<IStoragesManager>& storages, const std::shared_ptr<arrow::Schema>& pkSchema)
197+
TBuildContext(
198+
const TInternalPathId pathId, const std::shared_ptr<IStoragesManager>& storages, const std::shared_ptr<arrow::Schema>& pkSchema)
195199
: PathId(pathId)
196200
, Storages(storages)
197201
, PKSchema(pkSchema)
198-
, DefaultStrategy(EOptimizerStrategy::Default) { //TODO configure me via DDL
202+
, DefaultStrategy(EOptimizerStrategy::Default) { //TODO configure me via DDL
199203
}
200204
};
201205

ydb/core/tx/columnshard/engines/storage/optimizer/lcbuckets/planner/level/abstract.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,13 @@ class IPortionsLevel {
351351
mutable std::optional<TInstant> PredOptimization = TInstant::Now();
352352

353353
public:
354-
virtual bool IsAppropriatePortionToMove(const TPortionAccessorConstructor& /*info*/) const {
354+
virtual ui64 GetExpectedPortionSize() const = 0;
355+
356+
virtual bool IsAppropriatePortionToMove(const TPortionInfoForCompaction& /*info*/) const {
355357
return false;
356358
}
357359

358-
virtual bool IsAppropriatePortionToStore(const TPortionAccessorConstructor& /*info*/) const {
360+
virtual bool IsAppropriatePortionToStore(const TPortionInfoForCompaction& /*info*/) const {
359361
return false;
360362
}
361363

@@ -413,14 +415,6 @@ class IPortionsLevel {
413415
AFL_VERIFY(DefaultPortionsSelector);
414416
}
415417

416-
bool CanTakePortion(const TPortionInfo::TConstPtr& portion) const {
417-
auto chain = GetAffectedPortions(portion->IndexKeyStart(), portion->IndexKeyEnd());
418-
if (chain && chain->GetPortions().size()) {
419-
return false;
420-
}
421-
return true;
422-
}
423-
424418
virtual bool IsLocked(const std::shared_ptr<NDataLocks::TManager>& locksManager) const = 0;
425419

426420
virtual TTaskDescription GetTaskDescription() const {

ydb/core/tx/columnshard/engines/storage/optimizer/lcbuckets/planner/level/common_level.h

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ class TOneLayerPortions: public IPortionsLevel {
1515
const bool StrictOneLayer = true;
1616
std::shared_ptr<TSimplePortionsGroupInfo> SummaryPortionsInfo;
1717

18+
virtual ui64 GetExpectedPortionSize() const override {
19+
return ExpectedPortionSize;
20+
}
21+
1822
ui64 GetLevelBlobBytesLimit() const {
1923
return std::max<ui64>(SizeLimitGuarantee, SummaryPortionsInfo->GetBlobBytes() * BytesLimitFraction);
2024
}
@@ -59,12 +63,27 @@ class TOneLayerPortions: public IPortionsLevel {
5963
if (!GetNextLevel()) {
6064
return 0;
6165
}
62-
if ((ui64)GetPortionsInfo().GetBlobBytes() > GetLevelBlobBytesLimit() && GetPortionsInfo().GetCount() >= 2 &&
63-
(ui64)GetPortionsInfo().GetBlobBytes() > ExpectedPortionSize * 2) {
64-
return ((ui64)GetLevelId() << 48) + GetPortionsInfo().GetBlobBytes() - GetLevelBlobBytesLimit();
65-
} else {
66+
if ((ui64)GetPortionsInfo().GetBlobBytes() < GetLevelBlobBytesLimit()) {
67+
return 0;
68+
}
69+
if (GetPortionsInfo().GetCount() < 2) {
70+
return 0;
71+
}
72+
if ((ui64)GetPortionsInfo().GetBlobBytes() < std::max(GetNextLevel()->GetExpectedPortionSize(), GetExpectedPortionSize())) {
6673
return 0;
6774
}
75+
return ((ui64)GetLevelId() << 48) + GetPortionsInfo().GetBlobBytes() - GetLevelBlobBytesLimit();
76+
}
77+
78+
virtual bool IsAppropriatePortionToStore(const TPortionInfoForCompaction& info) const override {
79+
if (info.GetTotalBlobBytes() < GetExpectedPortionSize()) {
80+
return false;
81+
}
82+
return !GetAffectedPortionBytes(info.GetFirstPK(), info.GetLastPK());
83+
}
84+
85+
virtual bool IsAppropriatePortionToMove(const TPortionInfoForCompaction& /*info*/) const override {
86+
return true;
6887
}
6988

7089
virtual TInstant DoGetWeightExpirationInstant() const override {
@@ -85,10 +104,6 @@ class TOneLayerPortions: public IPortionsLevel {
85104
{
86105
}
87106

88-
ui64 GetExpectedPortionSize() const {
89-
return ExpectedPortionSize;
90-
}
91-
92107
virtual bool IsLocked(const std::shared_ptr<NDataLocks::TManager>& locksManager) const override {
93108
for (auto&& i : Portions) {
94109
if (locksManager->IsLocked(*i.GetPortion(), NDataLocks::ELockCategory::Compaction)) {

0 commit comments

Comments
 (0)