Skip to content

Commit 45513a5

Browse files
author
Vladislav Gogov
authored
Column Family for ColumnTable (#9657)
1 parent 4f2597b commit 45513a5

File tree

34 files changed

+2531
-299
lines changed

34 files changed

+2531
-299
lines changed

ydb/core/formats/arrow/serializer/native.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,8 @@ NKikimr::TConclusion<std::shared_ptr<arrow::util::Codec>> TNativeSerializer::Bui
113113
const int levelMin = codec->minimum_compression_level();
114114
const int levelMax = codec->maximum_compression_level();
115115
if (levelDef < levelMin || levelMax < levelDef) {
116-
return TConclusionStatus::Fail(
117-
TStringBuilder() << "incorrect level for codec. have to be: [" << levelMin << ":" << levelMax << "]"
118-
);
116+
return TConclusionStatus::Fail(TStringBuilder() << "incorrect level for codec `" << arrow::util::Codec::GetCodecAsString(cType)
117+
<< "`. have to be: [" << levelMin << ":" << levelMax << "]");
119118
}
120119
std::shared_ptr<arrow::util::Codec> codecPtr = std::move(NArrow::TStatusValidator::GetValid(arrow::util::Codec::Create(cType, levelDef)));
121120
return codecPtr;
@@ -182,7 +181,9 @@ NKikimr::TConclusionStatus TNativeSerializer::DoDeserializeFromProto(const NKiki
182181
void TNativeSerializer::DoSerializeToProto(NKikimrSchemeOp::TOlapColumn::TSerializer& proto) const {
183182
if (Options.codec) {
184183
proto.MutableArrowCompression()->SetCodec(NArrow::CompressionToProto(Options.codec->compression_type()));
185-
proto.MutableArrowCompression()->SetLevel(Options.codec->compression_level());
184+
if (arrow::util::Codec::SupportsCompressionLevel(Options.codec->compression_type())) {
185+
proto.MutableArrowCompression()->SetLevel(Options.codec->compression_level());
186+
}
186187
} else {
187188
proto.MutableArrowCompression()->SetCodec(NArrow::CompressionToProto(arrow::Compression::UNCOMPRESSED));
188189
}

ydb/core/formats/arrow/serializer/native.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ class TNativeSerializer: public ISerializer {
103103
Options.use_threads = false;
104104
Options.memory_pool = pool;
105105
}
106+
107+
arrow::Compression::type GetCodecType() const {
108+
if (Options.codec) {
109+
return Options.codec->compression_type();
110+
}
111+
return arrow::Compression::type::UNCOMPRESSED;
112+
}
113+
114+
std::optional<i32> GetCodecLevel() const {
115+
if (Options.codec && arrow::util::Codec::SupportsCompressionLevel(Options.codec->compression_type())) {
116+
return Options.codec->compression_level();
117+
}
118+
return {};
119+
}
106120
};
107121

108122
}

ydb/core/formats/arrow/serializer/parsing.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ std::string CompressionToString(const arrow::Compression::type compression) {
77
return arrow::util::Codec::GetCodecAsString(compression);
88
}
99

10+
std::string CompressionToString(const NKikimrSchemeOp::EColumnCodec compression) {
11+
switch (compression) {
12+
case NKikimrSchemeOp::EColumnCodec::ColumnCodecPlain:
13+
return "off";
14+
case NKikimrSchemeOp::EColumnCodec::ColumnCodecZSTD:
15+
return "zstd";
16+
case NKikimrSchemeOp::EColumnCodec::ColumnCodecLZ4:
17+
return "lz4";
18+
}
19+
return "";
20+
}
21+
1022
std::optional<arrow::Compression::type> CompressionFromString(const std::string& compressionStr) {
1123
auto result = arrow::util::Codec::GetCompressionType(compressionStr);
1224
if (!result.ok()) {

ydb/core/formats/arrow/serializer/parsing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
namespace NKikimr::NArrow {
1010

1111
std::string CompressionToString(const arrow::Compression::type compression);
12+
std::string CompressionToString(const NKikimrSchemeOp::EColumnCodec compression);
1213
std::optional<arrow::Compression::type> CompressionFromString(const std::string& compressionStr);
1314

1415
NKikimrSchemeOp::EColumnCodec CompressionToProto(const arrow::Compression::type compression);
1516
std::optional<arrow::Compression::type> CompressionFromProto(const NKikimrSchemeOp::EColumnCodec compression);
16-
1717
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#include "parsing.h"
2+
#include "utils.h"
3+
4+
#include <ydb/library/formats/arrow/validation/validation.h>
5+
6+
#include <contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h>
7+
8+
namespace NKikimr::NArrow {
9+
bool SupportsCompressionLevel(const arrow::Compression::type compression) {
10+
return arrow::util::Codec::SupportsCompressionLevel(compression);
11+
}
12+
13+
bool SupportsCompressionLevel(const NKikimrSchemeOp::EColumnCodec compression) {
14+
return SupportsCompressionLevel(CompressionFromProto(compression).value());
15+
}
16+
17+
std::optional<int> MinimumCompressionLevel(const arrow::Compression::type compression) {
18+
if (!SupportsCompressionLevel(compression)) {
19+
return {};
20+
}
21+
return NArrow::TStatusValidator::GetValid(arrow::util::Codec::MinimumCompressionLevel(compression));
22+
}
23+
std::optional<int> MaximumCompressionLevel(const arrow::Compression::type compression) {
24+
if (!SupportsCompressionLevel(compression)) {
25+
return {};
26+
}
27+
return NArrow::TStatusValidator::GetValid(arrow::util::Codec::MaximumCompressionLevel(compression));
28+
}
29+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include <ydb/core/protos/flat_scheme_op.pb.h>
4+
5+
#include <contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h>
6+
#include <util/system/yassert.h>
7+
8+
#include <optional>
9+
10+
namespace NKikimr::NArrow {
11+
bool SupportsCompressionLevel(const arrow::Compression::type compression);
12+
bool SupportsCompressionLevel(const NKikimrSchemeOp::EColumnCodec compression);
13+
14+
std::optional<int> MinimumCompressionLevel(const arrow::Compression::type compression);
15+
std::optional<int> MaximumCompressionLevel(const arrow::Compression::type compression);
16+
}

ydb/core/formats/arrow/serializer/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ SRCS(
1313
GLOBAL native.cpp
1414
stream.cpp
1515
parsing.cpp
16+
utils.cpp
1617
)
1718

1819
END()

ydb/core/kqp/host/kqp_gateway_proxy.cpp

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ bool ConvertCreateTableSettingsToProto(NYql::TKikimrTableMetadataPtr metadata, Y
132132
familyProto->set_compression(Ydb::Table::ColumnFamily::COMPRESSION_NONE);
133133
} else if (to_lower(family.Compression.GetRef()) == "lz4") {
134134
familyProto->set_compression(Ydb::Table::ColumnFamily::COMPRESSION_LZ4);
135+
} else if (to_lower(family.Compression.GetRef()) == "zstd") {
136+
familyProto->set_compression(Ydb::Table::ColumnFamily::COMPRESSION_ZSTD);
135137
} else {
136138
code = Ydb::StatusIds::BAD_REQUEST;
137139
error = TStringBuilder() << "Unknown compression '" << family.Compression.GetRef() << "' for a column family";
@@ -383,9 +385,59 @@ bool FillCreateTableDesc(NYql::TKikimrTableMetadataPtr metadata, NKikimrSchemeOp
383385
}
384386

385387
template <typename T>
386-
void FillColumnTableSchema(NKikimrSchemeOp::TColumnTableSchema& schema, const T& metadata)
387-
{
388+
bool FillColumnTableSchema(NKikimrSchemeOp::TColumnTableSchema& schema, const T& metadata, Ydb::StatusIds::StatusCode& code, TString& error) {
388389
Y_ENSURE(metadata.ColumnOrder.size() == metadata.Columns.size());
390+
391+
THashMap<TString, ui32> columnFamiliesByName;
392+
ui32 columnFamilyId = 1;
393+
for (const auto& family : metadata.ColumnFamilies) {
394+
if (family.Data.Defined()) {
395+
code = Ydb::StatusIds::BAD_REQUEST;
396+
error = TStringBuilder() << "Field `DATA` is not supported for OLAP tables in column family '" << family.Name << "'";
397+
return false;
398+
}
399+
auto columnFamilyIt = columnFamiliesByName.find(family.Name);
400+
if (!columnFamilyIt.IsEnd()) {
401+
code = Ydb::StatusIds::BAD_REQUEST;
402+
error = TStringBuilder() << "Duplicate column family `" << family.Name << '`';
403+
return false;
404+
}
405+
auto familyDescription = schema.AddColumnFamilies();
406+
familyDescription->SetName(family.Name);
407+
if (familyDescription->GetName() == "default") {
408+
familyDescription->SetId(0);
409+
} else {
410+
familyDescription->SetId(columnFamilyId++);
411+
}
412+
Y_ENSURE(columnFamiliesByName.emplace(familyDescription->GetName(), familyDescription->GetId()).second);
413+
if (family.Compression.Defined()) {
414+
NKikimrSchemeOp::EColumnCodec codec;
415+
auto codecName = to_lower(family.Compression.GetRef());
416+
if (codecName == "off") {
417+
codec = NKikimrSchemeOp::EColumnCodec::ColumnCodecPlain;
418+
} else if (codecName == "zstd") {
419+
codec = NKikimrSchemeOp::EColumnCodec::ColumnCodecZSTD;
420+
} else if (codecName == "lz4") {
421+
codec = NKikimrSchemeOp::EColumnCodec::ColumnCodecLZ4;
422+
} else {
423+
code = Ydb::StatusIds::BAD_REQUEST;
424+
error = TStringBuilder() << "Unknown compression '" << family.Compression.GetRef() << "' for a column family";
425+
return false;
426+
}
427+
familyDescription->SetColumnCodec(codec);
428+
} else {
429+
code = Ydb::StatusIds::BAD_REQUEST;
430+
error = TStringBuilder() << "Compression is not set for column family'" << family.Name << "'";
431+
return false;
432+
}
433+
434+
if (family.CompressionLevel.Defined()) {
435+
familyDescription->SetColumnCodecLevel(family.CompressionLevel.GetRef());
436+
}
437+
}
438+
439+
schema.SetNextColumnFamilyId(columnFamilyId);
440+
389441
for (const auto& name : metadata.ColumnOrder) {
390442
auto columnIt = metadata.Columns.find(name);
391443
Y_ENSURE(columnIt != metadata.Columns.end());
@@ -399,11 +451,29 @@ void FillColumnTableSchema(NKikimrSchemeOp::TColumnTableSchema& schema, const T&
399451
if (columnType.TypeInfo) {
400452
*columnDesc.MutableTypeInfo() = *columnType.TypeInfo;
401453
}
454+
455+
if (!columnFamiliesByName.empty()) {
456+
TString columnFamilyName = "default";
457+
ui32 columnFamilyId = 0;
458+
if (columnIt->second.Families.size()) {
459+
columnFamilyName = *columnIt->second.Families.begin();
460+
auto columnFamilyIdIt = columnFamiliesByName.find(columnFamilyName);
461+
if (columnFamilyIdIt.IsEnd()) {
462+
code = Ydb::StatusIds::BAD_REQUEST;
463+
error = TStringBuilder() << "Unknown column family `" << columnFamilyName << "` for column `" << columnDesc.GetName() << "`";
464+
return false;
465+
}
466+
columnFamilyId = columnFamilyIdIt->second;
467+
}
468+
columnDesc.SetColumnFamilyName(columnFamilyName);
469+
columnDesc.SetColumnFamilyId(columnFamilyId);
470+
}
402471
}
403472

404473
for (const auto& keyColumn : metadata.KeyColumnNames) {
405474
schema.AddKeyColumnNames(keyColumn);
406475
}
476+
return true;
407477
}
408478

409479
bool FillCreateColumnTableDesc(NYql::TKikimrTableMetadataPtr metadata,
@@ -1705,7 +1775,12 @@ class TKqpGatewayProxy : public IKikimrGateway {
17051775
NKikimrSchemeOp::TColumnTableDescription* tableDesc = schemeTx.MutableCreateColumnTable();
17061776

17071777
tableDesc->SetName(pathPair.second);
1708-
FillColumnTableSchema(*tableDesc->MutableSchema(), *metadata);
1778+
if (!FillColumnTableSchema(*tableDesc->MutableSchema(), *metadata, code, error)) {
1779+
IKqpGateway::TGenericResult errResult;
1780+
errResult.AddIssue(NYql::TIssue(error));
1781+
errResult.SetStatus(NYql::YqlStatusFromYdbStatus(code));
1782+
return MakeFuture(std::move(errResult));
1783+
}
17091784

17101785
if (!FillCreateColumnTableDesc(metadata, *tableDesc, code, error)) {
17111786
IKqpGateway::TGenericResult errResult;
@@ -2016,7 +2091,22 @@ class TKqpGatewayProxy : public IKikimrGateway {
20162091

20172092
NKikimrSchemeOp::TColumnTableSchemaPreset* schemaPreset = storeDesc->AddSchemaPresets();
20182093
schemaPreset->SetName("default");
2019-
FillColumnTableSchema(*schemaPreset->MutableSchema(), settings);
2094+
2095+
if (!settings.ColumnFamilies.empty()) {
2096+
IKqpGateway::TGenericResult errResult;
2097+
errResult.AddIssue(NYql::TIssue("TableStore does not support column families"));
2098+
errResult.SetStatus(NYql::YqlStatusFromYdbStatus(Ydb::StatusIds::BAD_REQUEST));
2099+
return MakeFuture(std::move(errResult));
2100+
}
2101+
2102+
Ydb::StatusIds::StatusCode code;
2103+
TString error;
2104+
if (!FillColumnTableSchema(*schemaPreset->MutableSchema(), settings, code, error)) {
2105+
IKqpGateway::TGenericResult errResult;
2106+
errResult.AddIssue(NYql::TIssue(error));
2107+
errResult.SetStatus(NYql::YqlStatusFromYdbStatus(code));
2108+
return MakeFuture(std::move(errResult));
2109+
}
20202110

20212111
if (IsPrepare()) {
20222112
auto& phyQuery = *SessionCtx->Query().PreparingQuery->MutablePhysicalQuery();

ydb/core/kqp/provider/yql_kikimr_exec.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ namespace {
182182
return dropGroupSettings;
183183
}
184184

185-
TCreateTableStoreSettings ParseCreateTableStoreSettings(TKiCreateTable create, const TTableSettings& settings) {
185+
TCreateTableStoreSettings ParseCreateTableStoreSettings(
186+
TKiCreateTable create, const TTableSettings& settings, const TVector<TColumnFamily>& columnFamilies) {
186187
TCreateTableStoreSettings out;
187188
out.TableStore = TString(create.Table());
188189
out.ShardsCount = settings.MinPartitions ? *settings.MinPartitions : 0;
@@ -215,6 +216,13 @@ namespace {
215216
columnMeta.NotNull = notNull;
216217
}
217218

219+
if (columnTuple.Size() > 3) {
220+
auto families = columnTuple.Item(3).Cast<TCoAtomList>();
221+
for (auto family : families) {
222+
columnMeta.Families.push_back(TString(family.Value()));
223+
}
224+
}
225+
218226
out.ColumnOrder.push_back(columnName);
219227
out.Columns.insert(std::make_pair(columnName, columnMeta));
220228
}
@@ -224,6 +232,7 @@ namespace {
224232
out.Indexes.push_back(indexDesc);
225233
}
226234
#endif
235+
out.ColumnFamilies = columnFamilies;
227236
return out;
228237
}
229238

@@ -1250,8 +1259,8 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
12501259
TStringBuilder() << "TABLESTORE with not COLUMN store"));
12511260
return SyncError();
12521261
}
1253-
future = Gateway->CreateTableStore(cluster,
1254-
ParseCreateTableStoreSettings(maybeCreate.Cast(), table.Metadata->TableSettings), existingOk);
1262+
future = Gateway->CreateTableStore(cluster, ParseCreateTableStoreSettings(maybeCreate.Cast(), table.Metadata->TableSettings,
1263+
table.Metadata->ColumnFamilies), existingOk);
12551264
break;
12561265
}
12571266
case ETableType::Table:
@@ -1569,6 +1578,8 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
15691578
f->set_compression(Ydb::Table::ColumnFamily::COMPRESSION_NONE);
15701579
} else if (to_lower(comp) == "lz4") {
15711580
f->set_compression(Ydb::Table::ColumnFamily::COMPRESSION_LZ4);
1581+
} else if (to_lower(comp) == "zstd") {
1582+
f->set_compression(Ydb::Table::ColumnFamily::COMPRESSION_ZSTD);
15721583
} else {
15731584
auto errText = TStringBuilder() << "Unknown compression '" << comp
15741585
<< "' for a column family";

ydb/core/kqp/provider/yql_kikimr_gateway.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,7 @@ struct TCreateTableStoreSettings {
699699
TVector<TString> KeyColumnNames;
700700
TVector<TString> ColumnOrder;
701701
TVector<TIndexDescription> Indexes;
702+
TVector<TColumnFamily> ColumnFamilies;
702703
};
703704

704705
struct TAlterTableStoreSettings {

0 commit comments

Comments
 (0)