|
1 | 1 | #include "workload.h"
|
2 | 2 | #include <contrib/libs/fmt/include/fmt/format.h>
|
3 | 3 | #include <ydb/public/api/protos/ydb_formats.pb.h>
|
| 4 | +#include <ydb/library/yaml_json/yaml_to_json.h> |
| 5 | +#include <contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h> |
4 | 6 | #include <util/string/cast.h>
|
5 | 7 | #include <util/system/spinlock.h>
|
6 | 8 |
|
@@ -33,52 +35,91 @@ const TString TWorkloadGeneratorBase::CsvFormatString = [] () {
|
33 | 35 | return settings.SerializeAsString();
|
34 | 36 | } ();
|
35 | 37 |
|
| 38 | +void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const { |
| 39 | + auto specialTypes = GetSpecialDataTypes(); |
| 40 | + specialTypes["string_type"] = Params.GetStringType(); |
| 41 | + specialTypes["date_type"] = Params.GetDateType(); |
| 42 | + specialTypes["timestamp_type"] = Params.GetTimestampType(); |
| 43 | + |
| 44 | + const auto& tableName = table["name"].GetString(); |
| 45 | + const auto path = Params.GetFullTableName(single ? nullptr : tableName.c_str()); |
| 46 | + result << Endl << "CREATE "; |
| 47 | + if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) { |
| 48 | + result << "EXTERNAL "; |
| 49 | + } |
| 50 | + result << "TABLE `" << path << "` (" << Endl; |
| 51 | + TVector<TStringBuilder> columns; |
| 52 | + for (const auto& column: table["columns"].GetArray()) { |
| 53 | + const auto& columnName = column["name"].GetString(); |
| 54 | + columns.emplace_back(); |
| 55 | + auto& so = columns.back(); |
| 56 | + so << " " << columnName << " "; |
| 57 | + const auto& type = column["type"].GetString(); |
| 58 | + if (const auto* st = MapFindPtr(specialTypes, type)) { |
| 59 | + so << *st; |
| 60 | + } else { |
| 61 | + so << type; |
| 62 | + } |
| 63 | + if (column["not_null"].GetBooleanSafe(false) && Params.GetStoreType() != TWorkloadBaseParams::EStoreType::Row) { |
| 64 | + so << " NOT NULL"; |
| 65 | + } |
| 66 | + } |
| 67 | + result << JoinSeq(",\n", columns); |
| 68 | + TVector<TStringBuf> keysV; |
| 69 | + for (const auto& k: table["primary_key"].GetArray()) { |
| 70 | + keysV.emplace_back(k.GetString()); |
| 71 | + } |
| 72 | + const TString keys = JoinSeq(", ", keysV); |
| 73 | + if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) { |
| 74 | + result << Endl; |
| 75 | + } else { |
| 76 | + result << "," << Endl << " PRIMARY KEY (" << keys << ")" << Endl; |
| 77 | + } |
| 78 | + result << ")" << Endl; |
| 79 | + |
| 80 | + if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) { |
| 81 | + result << "PARTITION BY HASH (" << keys << ")" << Endl; |
| 82 | + } |
| 83 | + |
| 84 | + result << "WITH (" << Endl; |
| 85 | + if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) { |
| 86 | + result << " DATA_SOURCE = \""+ Params.GetFullTableName(nullptr) + "_s3_external_source\", FORMAT = \"parquet\", LOCATION = \"" << Params.GetS3Prefix() |
| 87 | + << "/" << (single ? TFsPath(Params.GetPath()).GetName() : (tableName + "/")) << "\"" << Endl; |
| 88 | + } else { |
| 89 | + if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) { |
| 90 | + result << " STORE = COLUMN," << Endl; |
| 91 | + } |
| 92 | + result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl; |
| 93 | + } |
| 94 | + result << ");" << Endl; |
| 95 | +} |
| 96 | + |
36 | 97 | std::string TWorkloadGeneratorBase::GetDDLQueries() const {
|
37 |
| - TString storageType = "-- "; |
38 |
| - TString notNull = ""; |
39 |
| - TString createExternalDataSource; |
40 |
| - TString external; |
41 |
| - TString partitioning = "AUTO_PARTITIONING_MIN_PARTITIONS_COUNT"; |
42 |
| - TString primaryKey = ", PRIMARY KEY"; |
43 |
| - TString partitionBy = "-- "; |
44 |
| - switch (Params.GetStoreType()) { |
45 |
| - case TWorkloadBaseParams::EStoreType::Column: |
46 |
| - storageType = "STORE = COLUMN, --"; |
47 |
| - notNull = "NOT NULL"; |
48 |
| - partitionBy = "PARTITION BY HASH"; |
49 |
| - break; |
50 |
| - case TWorkloadBaseParams::EStoreType::ExternalS3: { |
51 |
| - TString dataSourceName = Params.GetFullTableName(nullptr) + "_s3_external_source"; |
52 |
| - storageType = fmt::format(R"(DATA_SOURCE = "{}", FORMAT = "parquet", LOCATION = )", dataSourceName); |
53 |
| - notNull = "NOT NULL"; |
54 |
| - createExternalDataSource = fmt::format(R"( |
55 |
| - CREATE EXTERNAL DATA SOURCE `{}` WITH ( |
56 |
| - SOURCE_TYPE="ObjectStorage", |
57 |
| - LOCATION="{}", |
58 |
| - AUTH_METHOD="NONE" |
59 |
| - ); |
60 |
| - )", dataSourceName, Params.GetS3Endpoint()); |
61 |
| - external = "EXTERNAL"; |
62 |
| - partitioning = "--"; |
63 |
| - primaryKey = "--"; |
| 98 | + const auto json = GetTablesJson(); |
| 99 | + |
| 100 | + TStringBuilder result; |
| 101 | + result << "--!syntax_v1" << Endl; |
| 102 | + if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) { |
| 103 | + result << "CREATE EXTERNAL DATA SOURCE `" << Params.GetFullTableName(nullptr) << "_s3_external_source` WITH (" << Endl |
| 104 | + << " SOURCE_TYPE=\"ObjectStorage\"," << Endl |
| 105 | + << " LOCATION=\"" << Params.GetS3Endpoint() << "\"," << Endl |
| 106 | + << " AUTH_METHOD=\"NONE\"" << Endl |
| 107 | + << ");" << Endl; |
64 | 108 | }
|
65 |
| - case TWorkloadBaseParams::EStoreType::Row: |
66 |
| - break; |
| 109 | + |
| 110 | + for (const auto& table: json["tables"].GetArray()) { |
| 111 | + GenerateDDLForTable(result.Out, table, false); |
| 112 | + } |
| 113 | + if (json.Has("table")) { |
| 114 | + GenerateDDLForTable(result.Out, json["table"], true); |
67 | 115 | }
|
68 |
| - auto createSql = DoGetDDLQueries(); |
69 |
| - SubstGlobal(createSql, "{createExternal}", createExternalDataSource); |
70 |
| - SubstGlobal(createSql, "{external}", external); |
71 |
| - SubstGlobal(createSql, "{notnull}", notNull); |
72 |
| - SubstGlobal(createSql, "{partitioning}", partitioning); |
73 |
| - SubstGlobal(createSql, "{path}", Params.GetFullTableName(nullptr)); |
74 |
| - SubstGlobal(createSql, "{primary_key}", primaryKey); |
75 |
| - SubstGlobal(createSql, "{s3_prefix}", Params.GetS3Prefix()); |
76 |
| - SubstGlobal(createSql, "{store}", storageType); |
77 |
| - SubstGlobal(createSql, "{partition_by}", partitionBy); |
78 |
| - SubstGlobal(createSql, "{string_type}", Params.GetStringType()); |
79 |
| - SubstGlobal(createSql, "{date_type}", Params.GetDateType()); |
80 |
| - SubstGlobal(createSql, "{timestamp_type}", Params.GetTimestampType()); |
81 |
| - return createSql.c_str(); |
| 116 | + return result; |
| 117 | +} |
| 118 | + |
| 119 | +NJson::TJsonValue TWorkloadGeneratorBase::GetTablesJson() const { |
| 120 | + const auto tablesYaml = GetTablesYaml(); |
| 121 | + const auto yaml = YAML::Load(tablesYaml.c_str()); |
| 122 | + return NKikimr::NYaml::Yaml2Json(yaml, true); |
82 | 123 | }
|
83 | 124 |
|
84 | 125 | TVector<std::string> TWorkloadGeneratorBase::GetCleanPaths() const {
|
|
0 commit comments