Skip to content

Commit 5a2bffd

Browse files
authored
refactoring tpch generator (#10414)
1 parent df7e5a0 commit 5a2bffd

File tree

6 files changed

+203
-308
lines changed

6 files changed

+203
-308
lines changed

ydb/library/workload/benchmark_base/workload.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@ const TString TWorkloadGeneratorBase::TsvFormatString = [] () {
1616
return settings.SerializeAsString();
1717
} ();
1818

19+
const TString TWorkloadGeneratorBase::PsvDelimiter = "|";
20+
const TString TWorkloadGeneratorBase::PsvFormatString = [] () {
21+
Ydb::Formats::CsvSettings settings;
22+
settings.set_delimiter(PsvDelimiter);
23+
settings.set_header(true);
24+
settings.mutable_quoting()->set_disabled(true);
25+
return settings.SerializeAsString();
26+
} ();
27+
1928
const TString TWorkloadGeneratorBase::CsvDelimiter = ",";
2029
const TString TWorkloadGeneratorBase::CsvFormatString = [] () {
2130
Ydb::Formats::CsvSettings settings;

ydb/library/workload/benchmark_base/workload.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class TWorkloadGeneratorBase : public IWorkloadQueryGenerator {
3838
static const TString TsvFormatString;
3939
static const TString CsvDelimiter;
4040
static const TString CsvFormatString;
41+
static const TString PsvDelimiter;
42+
static const TString PsvFormatString;
4143

4244
protected:
4345
virtual TString DoGetDDLQueries() const = 0;

ydb/library/workload/tpc_base/tpc_base.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,77 @@ class TTpcBaseWorkloadGenerator: public TWorkloadGeneratorBase {
2929
void PatchHeader(TString& header) const;
3030
};
3131

32+
template<class T>
33+
class TCsvItemWriter {
34+
public:
35+
using TItem = T;
36+
using TWriteFunction = std::function<void(const TItem&, IOutputStream&)>;
37+
explicit TCsvItemWriter(IOutputStream& out)
38+
: Out(out)
39+
{}
40+
41+
void RegisterField(TStringBuf name, TWriteFunction writeFunc) {
42+
Fields.emplace_back(name, writeFunc);
43+
}
44+
45+
void WriteHeader() {
46+
if (HeaderWritten) {
47+
return;
48+
}
49+
for(const auto& field: Fields) {
50+
Out << field.Name;
51+
if (&field + 1 != Fields.end()) {
52+
Out << TWorkloadGeneratorBase::PsvDelimiter;
53+
}
54+
}
55+
Out << Endl;
56+
HeaderWritten = true;
57+
}
58+
59+
void Write(const TItem& item) {
60+
WriteHeader();
61+
for(const auto& field: Fields) {
62+
field.WriteFunction(item, Out);
63+
if (&field + 1 != Fields.end()) {
64+
Out << TWorkloadGeneratorBase::PsvDelimiter;
65+
}
66+
}
67+
Out << Endl;
68+
}
69+
70+
template<class TContainer>
71+
void Write(const TContainer& items) {
72+
for(const auto& item: items) {
73+
Write(item);
74+
}
75+
}
76+
77+
void Write(const TItem* items, size_t count) {
78+
for(size_t i = 0; i < count; ++i) {
79+
Write(items[i]);
80+
}
81+
}
82+
83+
private:
84+
struct TField {
85+
TField(TStringBuf name, TWriteFunction func)
86+
: Name(name)
87+
, WriteFunction(func)
88+
{}
89+
TStringBuf Name;
90+
TWriteFunction WriteFunction;
91+
};
92+
TVector<TField> Fields;
93+
IOutputStream& Out;
94+
bool HeaderWritten = false;
95+
};
96+
97+
#define CSV_WRITER_REGISTER_FIELD(writer, column_name, record_field) \
98+
writer.RegisterField(column_name, [](const decltype(writer)::TItem& item, IOutputStream& out) { \
99+
out << item.record_field; \
100+
});
101+
102+
#define CSV_WRITER_REGISTER_SIMPLE_FIELD(writer, column_name) \
103+
CSV_WRITER_REGISTER_FIELD(writer, #column_name, column_name);
104+
32105
} // namespace NYdbWorkload

ydb/library/workload/tpcds/data_generator.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,6 @@ TStringBuilder& TTpcdsWorkloadDataInitializerGenerator::TBulkDataGenerator::TCon
8686
return Csv;
8787
}
8888

89-
namespace {
90-
const TString FormatString = [] () {
91-
Ydb::Formats::CsvSettings settings;
92-
settings.set_delimiter("|");
93-
settings.set_header(true);
94-
settings.mutable_quoting()->set_disabled(true);
95-
return settings.SerializeAsString();
96-
} ();
97-
}
9889
void TTpcdsWorkloadDataInitializerGenerator::TBulkDataGenerator::TContext::AppendPortions(TDataPortions& result) {
9990
const auto name = getTdefsByNumber(TableNum)->name;
10091
const auto path = Owner.GetFullTableName(name);
@@ -113,7 +104,7 @@ void TTpcdsWorkloadDataInitializerGenerator::TBulkDataGenerator::TContext::Appen
113104
Owner.Owner.StateProcessor.Get(),
114105
path,
115106
name,
116-
TDataPortion::TCsv(std::move(Csv), FormatString),
107+
TDataPortion::TCsv(std::move(Csv), TWorkloadGeneratorBase::PsvFormatString),
117108
Start - 1,
118109
Count
119110
));

ydb/library/workload/tpcds/data_generator.h

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -68,58 +68,6 @@ class TTpcdsWorkloadDataInitializerGenerator: public TWorkloadDataInitializerBas
6868
};
6969
};
7070

71-
template<class T>
72-
class TCsvItemWriter {
73-
public:
74-
using TItem = T;
75-
using TWriteFunction = std::function<void(const TItem&, IOutputStream&)>;
76-
explicit TCsvItemWriter(IOutputStream& out)
77-
: Out(out)
78-
{}
79-
80-
void RegisterField(TStringBuf name, TWriteFunction writeFunc) {
81-
Fields.emplace_back(name, writeFunc);
82-
}
83-
void Write(const TVector<TItem>& items) {
84-
for(const auto& field: Fields) {
85-
Out << field.Name;
86-
if (&field + 1 != Fields.end()) {
87-
Out << '|';
88-
}
89-
}
90-
Out << Endl;
91-
for(const auto& item: items) {
92-
for(const auto& field: Fields) {
93-
field.WriteFunction(item, Out);
94-
if (&field + 1 != Fields.end()) {
95-
Out << '|';
96-
}
97-
}
98-
Out << Endl;
99-
}
100-
}
101-
102-
private:
103-
struct TField {
104-
TField(TStringBuf name, TWriteFunction func)
105-
: Name(name)
106-
, WriteFunction(func)
107-
{}
108-
TStringBuf Name;
109-
TWriteFunction WriteFunction;
110-
};
111-
TVector<TField> Fields;
112-
IOutputStream& Out;
113-
};
114-
115-
#define CSV_WRITER_REGISTER_FIELD(writer, column_name, record_field) \
116-
writer.RegisterField(column_name, [](const decltype(writer)::TItem& item, IOutputStream& out) { \
117-
out << item.record_field; \
118-
});
119-
120-
#define CSV_WRITER_REGISTER_SIMPLE_FIELD(writer, column_name) \
121-
CSV_WRITER_REGISTER_FIELD(writer, #column_name, column_name);
122-
12371
#define CSV_WRITER_REGISTER_FIELD_KEY(writer, column_name, record_field) \
12472
writer.RegisterField(column_name, [](const decltype(writer)::TItem& item, IOutputStream& out) { \
12573
if (item.record_field != -1) { \

0 commit comments

Comments
 (0)