Skip to content

Commit b7cfb36

Browse files
authored
use yaml for table description instead yql (#11811)
1 parent 6f7ecd5 commit b7cfb36

File tree

44 files changed

+4892
-4535
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+4892
-4535
lines changed

ydb/library/workload/benchmark_base/workload.cpp

Lines changed: 84 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "workload.h"
22
#include <contrib/libs/fmt/include/fmt/format.h>
33
#include <ydb/public/api/protos/ydb_formats.pb.h>
4+
#include <ydb/library/yaml_json/yaml_to_json.h>
5+
#include <contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h>
46
#include <util/string/cast.h>
57
#include <util/system/spinlock.h>
68

@@ -33,52 +35,91 @@ const TString TWorkloadGeneratorBase::CsvFormatString = [] () {
3335
return settings.SerializeAsString();
3436
} ();
3537

38+
void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const {
39+
auto specialTypes = GetSpecialDataTypes();
40+
specialTypes["string_type"] = Params.GetStringType();
41+
specialTypes["date_type"] = Params.GetDateType();
42+
specialTypes["timestamp_type"] = Params.GetTimestampType();
43+
44+
const auto& tableName = table["name"].GetString();
45+
const auto path = Params.GetFullTableName(single ? nullptr : tableName.c_str());
46+
result << Endl << "CREATE ";
47+
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
48+
result << "EXTERNAL ";
49+
}
50+
result << "TABLE `" << path << "` (" << Endl;
51+
TVector<TStringBuilder> columns;
52+
for (const auto& column: table["columns"].GetArray()) {
53+
const auto& columnName = column["name"].GetString();
54+
columns.emplace_back();
55+
auto& so = columns.back();
56+
so << " " << columnName << " ";
57+
const auto& type = column["type"].GetString();
58+
if (const auto* st = MapFindPtr(specialTypes, type)) {
59+
so << *st;
60+
} else {
61+
so << type;
62+
}
63+
if (column["not_null"].GetBooleanSafe(false) && Params.GetStoreType() != TWorkloadBaseParams::EStoreType::Row) {
64+
so << " NOT NULL";
65+
}
66+
}
67+
result << JoinSeq(",\n", columns);
68+
TVector<TStringBuf> keysV;
69+
for (const auto& k: table["primary_key"].GetArray()) {
70+
keysV.emplace_back(k.GetString());
71+
}
72+
const TString keys = JoinSeq(", ", keysV);
73+
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
74+
result << Endl;
75+
} else {
76+
result << "," << Endl << " PRIMARY KEY (" << keys << ")" << Endl;
77+
}
78+
result << ")" << Endl;
79+
80+
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) {
81+
result << "PARTITION BY HASH (" << keys << ")" << Endl;
82+
}
83+
84+
result << "WITH (" << Endl;
85+
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
86+
result << " DATA_SOURCE = \""+ Params.GetFullTableName(nullptr) + "_s3_external_source\", FORMAT = \"parquet\", LOCATION = \"" << Params.GetS3Prefix()
87+
<< "/" << (single ? TFsPath(Params.GetPath()).GetName() : (tableName + "/")) << "\"" << Endl;
88+
} else {
89+
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) {
90+
result << " STORE = COLUMN," << Endl;
91+
}
92+
result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl;
93+
}
94+
result << ");" << Endl;
95+
}
96+
3697
std::string TWorkloadGeneratorBase::GetDDLQueries() const {
37-
TString storageType = "-- ";
38-
TString notNull = "";
39-
TString createExternalDataSource;
40-
TString external;
41-
TString partitioning = "AUTO_PARTITIONING_MIN_PARTITIONS_COUNT";
42-
TString primaryKey = ", PRIMARY KEY";
43-
TString partitionBy = "-- ";
44-
switch (Params.GetStoreType()) {
45-
case TWorkloadBaseParams::EStoreType::Column:
46-
storageType = "STORE = COLUMN, --";
47-
notNull = "NOT NULL";
48-
partitionBy = "PARTITION BY HASH";
49-
break;
50-
case TWorkloadBaseParams::EStoreType::ExternalS3: {
51-
TString dataSourceName = Params.GetFullTableName(nullptr) + "_s3_external_source";
52-
storageType = fmt::format(R"(DATA_SOURCE = "{}", FORMAT = "parquet", LOCATION = )", dataSourceName);
53-
notNull = "NOT NULL";
54-
createExternalDataSource = fmt::format(R"(
55-
CREATE EXTERNAL DATA SOURCE `{}` WITH (
56-
SOURCE_TYPE="ObjectStorage",
57-
LOCATION="{}",
58-
AUTH_METHOD="NONE"
59-
);
60-
)", dataSourceName, Params.GetS3Endpoint());
61-
external = "EXTERNAL";
62-
partitioning = "--";
63-
primaryKey = "--";
98+
const auto json = GetTablesJson();
99+
100+
TStringBuilder result;
101+
result << "--!syntax_v1" << Endl;
102+
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
103+
result << "CREATE EXTERNAL DATA SOURCE `" << Params.GetFullTableName(nullptr) << "_s3_external_source` WITH (" << Endl
104+
<< " SOURCE_TYPE=\"ObjectStorage\"," << Endl
105+
<< " LOCATION=\"" << Params.GetS3Endpoint() << "\"," << Endl
106+
<< " AUTH_METHOD=\"NONE\"" << Endl
107+
<< ");" << Endl;
64108
}
65-
case TWorkloadBaseParams::EStoreType::Row:
66-
break;
109+
110+
for (const auto& table: json["tables"].GetArray()) {
111+
GenerateDDLForTable(result.Out, table, false);
112+
}
113+
if (json.Has("table")) {
114+
GenerateDDLForTable(result.Out, json["table"], true);
67115
}
68-
auto createSql = DoGetDDLQueries();
69-
SubstGlobal(createSql, "{createExternal}", createExternalDataSource);
70-
SubstGlobal(createSql, "{external}", external);
71-
SubstGlobal(createSql, "{notnull}", notNull);
72-
SubstGlobal(createSql, "{partitioning}", partitioning);
73-
SubstGlobal(createSql, "{path}", Params.GetFullTableName(nullptr));
74-
SubstGlobal(createSql, "{primary_key}", primaryKey);
75-
SubstGlobal(createSql, "{s3_prefix}", Params.GetS3Prefix());
76-
SubstGlobal(createSql, "{store}", storageType);
77-
SubstGlobal(createSql, "{partition_by}", partitionBy);
78-
SubstGlobal(createSql, "{string_type}", Params.GetStringType());
79-
SubstGlobal(createSql, "{date_type}", Params.GetDateType());
80-
SubstGlobal(createSql, "{timestamp_type}", Params.GetTimestampType());
81-
return createSql.c_str();
116+
return result;
117+
}
118+
119+
NJson::TJsonValue TWorkloadGeneratorBase::GetTablesJson() const {
120+
const auto tablesYaml = GetTablesYaml();
121+
const auto yaml = YAML::Load(tablesYaml.c_str());
122+
return NKikimr::NYaml::Yaml2Json(yaml, true);
82123
}
83124

84125
TVector<std::string> TWorkloadGeneratorBase::GetCleanPaths() const {

ydb/library/workload/benchmark_base/workload.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "state.h"
44
#include <ydb/library/workload/abstract/workload_query_generator.h>
55
#include <ydb/library/accessor/accessor.h>
6+
#include <library/cpp/json/json_value.h>
67
#include <util/generic/set.h>
78
#include <util/generic/deque.h>
89
#include <util/folder/path.h>
@@ -47,10 +48,14 @@ class TWorkloadGeneratorBase : public IWorkloadQueryGenerator {
4748
static const TString PsvFormatString;
4849

4950
protected:
50-
virtual TString DoGetDDLQueries() const = 0;
51+
using TSpecialDataTypes = TMap<TString, TString>;
52+
virtual TString GetTablesYaml() const = 0;
53+
virtual TSpecialDataTypes GetSpecialDataTypes() const = 0;
54+
NJson::TJsonValue GetTablesJson() const;
5155

5256
THolder<TGeneratorStateProcessor> StateProcessor;
5357
private:
58+
void GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const;
5459
const TWorkloadBaseParams& Params;
5560
};
5661

ydb/library/workload/benchmark_base/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SRCS(
1010
PEERDIR(
1111
ydb/library/accessor
1212
ydb/library/workload/abstract
13+
ydb/library/yaml_json
1314
ydb/public/api/protos
1415
)
1516

ydb/library/workload/clickbench/click_bench_schema.sql

Lines changed: 0 additions & 118 deletions
This file was deleted.

0 commit comments

Comments
 (0)