Skip to content

Commit 4338c71

Browse files
authored
Add clickbench performance tests on PostgreSQL syntax over YDB (#10861)
1 parent 73e98dd commit 4338c71

File tree

9 files changed

+130
-8
lines changed

9 files changed

+130
-8
lines changed

ydb/library/workload/benchmark_base/workload.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ class TWorkloadBaseParams: public TWorkloadParams {
1717
Column /* "column" */,
1818
ExternalS3 /* "external-s3" */
1919
};
20+
enum class EQuerySyntax {
21+
YQL /* "yql" */,
22+
PG /* "pg"*/
23+
};
2024
void ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandType commandType, int workloadType) override;
2125
TString GetFullTableName(const char* table) const;
2226
YDB_ACCESSOR_DEF(TString, Path);

ydb/library/workload/benchmark_base/ya.make

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ PEERDIR(
1313
ydb/public/api/protos
1414
)
1515

16-
GENERATE_ENUM_SERIALIZATION(workload.h)
16+
GENERATE_ENUM_SERIALIZATION_WITH_HEADER(workload.h)
1717

1818
END()
1919

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
-- q00
2+
SELECT COUNT(*) FROM {table};
3+
-- q01
4+
SELECT COUNT(*) FROM {table} WHERE AdvEngineID <> 0;
5+
-- q02
6+
SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM {table};
7+
-- q03
8+
SELECT AVG(UserID) FROM {table};
9+
-- q04
10+
SELECT COUNT(DISTINCT UserID) FROM {table};
11+
-- q05
12+
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
13+
-- q06
14+
SELECT MIN(EventDate), MAX(EventDate) FROM {table};
15+
-- q07
16+
SELECT AdvEngineID, COUNT(*) FROM {table} WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
17+
-- q08
18+
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
19+
-- q09
20+
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
21+
-- q10
22+
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
23+
-- q11
24+
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
25+
-- q12
26+
SELECT SearchPhrase, COUNT(*) AS c FROM {table} WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
27+
-- q13
28+
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
29+
-- q14
30+
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM {table} WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
31+
-- q15
32+
SELECT UserID, COUNT(*) FROM {table} GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
33+
-- q16
34+
SELECT UserID, SearchPhrase, COUNT(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
35+
-- q17
36+
SELECT UserID, SearchPhrase, COUNT(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
37+
-- q18
38+
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
39+
-- q19
40+
SELECT UserID FROM {table} WHERE UserID = 435090932899640449;
41+
-- q20
42+
SELECT COUNT(*) FROM {table} WHERE URL LIKE '%google%';
43+
-- q21
44+
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM {table} WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
45+
-- q22
46+
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
47+
-- q23
48+
SELECT * FROM {table} WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
49+
-- q24
50+
SELECT SearchPhrase FROM {table} WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
51+
-- q25
52+
SELECT SearchPhrase FROM {table} WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
53+
-- q26
54+
SELECT SearchPhrase FROM {table} WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
55+
-- q27
56+
SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM {table} WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
57+
-- q28
58+
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM {table} WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
59+
-- q29
60+
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM {table};
61+
-- q30
62+
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM {table} WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
63+
-- q31
64+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM {table} WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
65+
-- q32
66+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
67+
-- q33
68+
SELECT URL, COUNT(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
69+
-- q34
70+
SELECT 1, URL, COUNT(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
71+
-- q35
72+
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
73+
-- q36
74+
SELECT URL, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
75+
-- q37
76+
SELECT Title, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
77+
-- q38
78+
SELECT URL, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
79+
-- q39
80+
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
81+
-- q40
82+
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
83+
-- q41
84+
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
85+
-- q42
86+
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;

ydb/library/workload/clickbench/clickbench.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "clickbench.h"
22
#include "data_generator.h"
3+
#include <ydb/library/workload/benchmark_base/workload.h_serialized.h>
34

45
#include <library/cpp/resource/resource.h>
56
#include <library/cpp/string_utils/csv/csv.h>
@@ -82,7 +83,12 @@ TQueryInfoList TClickbenchWorkloadGenerator::GetWorkload(int type) {
8283
queries.emplace_back(fInput.ReadAll());
8384
}
8485
} else {
85-
const auto resourceName = Params.IsCheckCanonical() ? "queries-deterministic.sql" : "click_bench_queries.sql";
86+
TString resourceName = "click_bench_queries.sql";
87+
if (Params.GetSyntax() == TWorkloadBaseParams::EQuerySyntax::PG) {
88+
resourceName = "click_bench_queries_pg.sql";
89+
} else if (Params.IsCheckCanonical()) {
90+
resourceName = "queries-deterministic.sql";
91+
}
8692
queries = StringSplitter(NResource::Find(resourceName)).Split(';').ToList<TString>();
8793
}
8894
auto strVariables = StringSplitter(Params.GetExternalVariablesString()).Split(';').SkipEmpty().ToList<TString>();
@@ -92,14 +98,26 @@ TQueryInfoList TClickbenchWorkloadGenerator::GetWorkload(int type) {
9298
Y_ABORT_UNLESS(v.DeserializeFromString(i));
9399
vars.emplace_back(v);
94100
}
95-
vars.emplace_back("table", "`" + Params.GetPath() + "`");
101+
TString quote;
102+
switch (Params.GetSyntax()) {
103+
case TWorkloadBaseParams::EQuerySyntax::YQL:
104+
quote = "`";
105+
break;
106+
case TWorkloadBaseParams::EQuerySyntax::PG:
107+
quote = "\"";
108+
break;
109+
};
110+
vars.emplace_back("table", quote + Params.GetPath() + quote);
96111
ui32 resultsUsage = 0;
97112
for (ui32 i = 0; i < queries.size(); ++i) {
98113
auto& query = queries[i];
114+
if (Params.GetSyntax() == TWorkloadBaseParams::EQuerySyntax::PG) {
115+
query = "--!syntax_pg\n" + query;
116+
}
99117
for (auto&& v : vars) {
100118
SubstGlobal(query, "{" + v.GetId() + "}", v.GetValue());
101119
}
102-
SubstGlobal(query, "$data", "`" + Params.GetPath() + "`");
120+
SubstGlobal(query, "$data", quote + Params.GetPath() + quote);
103121
result.emplace_back();
104122
result.back().Query = query;
105123
if (const auto* res = MapFindPtr(qResults, i)) {
@@ -164,6 +182,8 @@ void TClickbenchWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EC
164182
.StoreResult(&ExternalQueries);
165183
opts.AddLongOption('c', "check-canonical", "Use deterministic queries and check results with canonical ones.")
166184
.NoArgument().StoreTrue(&CheckCanonicalFlag);
185+
opts.AddLongOption( "syntax", "Query syntax [" + GetEnumAllNames<EQuerySyntax>() + "].")
186+
.StoreResult(&Syntax).DefaultValue(Syntax);
167187
break;
168188
default:
169189
break;

ydb/library/workload/clickbench/clickbench.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class TClickbenchWorkloadParams final: public TWorkloadBaseParams {
1919
YDB_READONLY_DEF(TFsPath, ExternalQueriesDir);
2020
YDB_READONLY_DEF(TFsPath, DataFiles);
2121
YDB_READONLY_FLAG(CheckCanonical, false);
22+
YDB_READONLY(EQuerySyntax, Syntax, EQuerySyntax::YQL);
2223
};
2324

2425
class TClickbenchWorkloadGenerator final: public TWorkloadGeneratorBase {

ydb/library/workload/clickbench/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ SRCS(
88

99
RESOURCE(
1010
click_bench_queries.sql click_bench_queries.sql
11+
click_bench_queries_pg.sql click_bench_queries_pg.sql
1112
${ARCADIA_ROOT}/ydb/tests/functional/clickbench/data/queries-deterministic.sql queries-deterministic.sql
1213
click_bench_schema.sql click_bench_schema.sql
1314
click_bench_canonical/q0.result click_bench_canonical/q0.result

ydb/tests/olap/lib/ydb_cli.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ def __init__(self,
6060
query_num: int,
6161
iterations: int,
6262
timeout: float,
63-
check_canonical: bool):
63+
check_canonical: bool,
64+
query_syntax: str):
6465
def _get_output_path(ext: str) -> str:
6566
return yatest.common.test_output_path(f'q{query_num}.{ext}')
6667

@@ -71,6 +72,7 @@ def _get_output_path(ext: str) -> str:
7172
self.iterations = iterations
7273
self.timeout = timeout
7374
self.check_canonical = check_canonical
75+
self.query_syntax = query_syntax
7476
self._nodes_info: dict[str, dict[str, int]] = {}
7577
self._plan_path = _get_output_path('plan')
7678
self._query_output_path = _get_output_path('out')
@@ -194,6 +196,8 @@ def _get_cmd(self) -> list[str]:
194196
cmd += ['--query-settings', query_preffix]
195197
if self.check_canonical:
196198
cmd.append('--check-canonical')
199+
if self.query_syntax:
200+
cmd += ['--syntax', self.query_syntax]
197201
return cmd
198202

199203
def _exec_cli(self) -> None:
@@ -222,5 +226,5 @@ def process(self) -> YdbCliHelper.WorkloadRunResult:
222226

223227
@staticmethod
224228
def workload_run(workload_type: WorkloadType, path: str, query_num: int, iterations: int = 5,
225-
timeout: float = 100., check_canonical: bool = False) -> YdbCliHelper.WorkloadRunResult:
226-
return YdbCliHelper.WorkloadProcessor(workload_type, path, query_num, iterations, timeout, check_canonical).process()
229+
timeout: float = 100., check_canonical: bool = False, query_syntax: str = '') -> YdbCliHelper.WorkloadRunResult:
230+
return YdbCliHelper.WorkloadProcessor(workload_type, path, query_num, iterations, timeout, check_canonical, query_syntax).process()

ydb/tests/olap/load/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def __init__(self, iterations: Optional[int] = None, timeout: Optional[float] =
2323
timeout: float = 1800.
2424
refference: str = ''
2525
check_canonical: bool = False
26+
query_syntax: str = ''
2627
query_settings: dict[int, LoadSuiteBase.QuerySettings] = {}
2728

2829
@property
@@ -195,7 +196,8 @@ def run_workload_test(self, path: str, query_num: int) -> None:
195196
iterations=self._get_iterations(query_num),
196197
workload_type=self.workload_type,
197198
timeout=self._get_timeout(query_num),
198-
check_canonical=self.check_canonical
199+
check_canonical=self.check_canonical,
200+
query_syntax=self.query_syntax
199201
)
200202
allure_test_description(self.suite, self._test_name(query_num), refference_set=self.refference, start_time=start_time, end_time=time())
201203
self.process_query_result(result, query_num, self._get_iterations(query_num), True)

ydb/tests/olap/load/test_clickbench.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,7 @@ def do_setup_class(self):
4141
@pytest.mark.parametrize('query_num', [i for i in range(0, 43)])
4242
def test_clickbench(self, query_num):
4343
self.run_workload_test(self.path, query_num)
44+
45+
46+
class TestClickbenchPg(TestClickbench):
47+
query_syntax = 'pg'

0 commit comments

Comments
 (0)