Skip to content

Commit 297d5b4

Browse files
authored
Multicolumn sparsed test (#8284)
1 parent a261c7b commit 297d5b4

File tree

7 files changed

+324
-41
lines changed

7 files changed

+324
-41
lines changed

ydb/core/formats/arrow/simple_builder/array.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,27 +52,30 @@ class TSimpleArrayConstructor: public IArrayBuilder {
5252
using TSelf = TSimpleArrayConstructor<TFiller>;
5353
using TBuilder = typename arrow::TypeTraits<typename TFiller::TValue>::BuilderType;
5454
const TFiller Filler;
55+
ui32 ShiftValue = 0;
5556

56-
TSimpleArrayConstructor(const TString& fieldName, bool nullable, const TFiller& filler)
57+
TSimpleArrayConstructor(const TString& fieldName, bool nullable, const TFiller& filler, ui32 shiftValue = 0)
5758
: TBase(fieldName, nullable)
5859
, Filler(filler)
60+
, ShiftValue(shiftValue)
5961
{
6062
}
6163
protected:
6264
virtual std::shared_ptr<arrow::Array> DoBuildArray(const ui32 recordsCount) const override {
6365
TBuilder fBuilder = TFillerBuilderConstructor<typename TFiller::TValue>::Construct();
6466
Y_ABORT_UNLESS(fBuilder.Reserve(recordsCount).ok());
6567
for (ui32 i = 0; i < recordsCount; ++i) {
66-
Y_ABORT_UNLESS(fBuilder.Append(Filler.GetValue(i)).ok());
68+
Y_ABORT_UNLESS(fBuilder.Append(Filler.GetValue(i + ShiftValue)).ok());
6769
}
6870
return *fBuilder.Finish();
6971
}
7072

71-
73+
7274
public:
73-
TSimpleArrayConstructor(const TString& fieldName, const TFiller& filler = TFiller())
75+
TSimpleArrayConstructor(const TString& fieldName, const TFiller& filler = TFiller(), ui32 shiftValue = 0)
7476
: TBase(fieldName)
7577
, Filler(filler)
78+
, ShiftValue(shiftValue)
7679
{
7780
}
7881

ydb/core/formats/arrow/simple_builder/filler.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22
#include <contrib/libs/apache/arrow/cpp/src/arrow/type.h>
33
#include <contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h>
44
#include <contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h>
5+
6+
#include <library/cpp/testing/unittest/registar.h>
7+
58
#include <util/generic/string.h>
69
#include <util/system/types.h>
10+
#include <util/random/random.h>
711

812
namespace NKikimr::NArrow::NConstruction {
913

@@ -25,6 +29,65 @@ class TIntSeqFiller {
2529
}
2630
};
2731

32+
class TStringType : public arrow::StringType {
33+
public:
34+
using c_type = TString;
35+
};
36+
37+
template <class TArrowType>
38+
class TPoolFiller {
39+
private:
40+
using CType = typename TArrowType::c_type;
41+
42+
private:
43+
std::vector<CType> Data;
44+
45+
public:
46+
using TValue = std::conditional_t<std::is_same_v<TArrowType, TStringType>, arrow::StringType, TArrowType>;
47+
using ValueType = std::conditional_t<std::is_same_v<TArrowType, TStringType>, arrow::util::string_view, CType>;
48+
49+
static CType GetRandomNumberNotEqDef(CType defaultValue) {
50+
CType result;
51+
do {
52+
result = RandomNumber<double>() * std::numeric_limits<CType>::max();
53+
} while (result == defaultValue);
54+
return result;
55+
}
56+
57+
TPoolFiller(const ui32 poolSize, const CType defaultValue, const double defaultValueFrq) {
58+
for (ui32 i = 0; i < poolSize; ++i) {
59+
if (RandomNumber<double>() < defaultValueFrq) {
60+
Data.emplace_back(defaultValue);
61+
} else {
62+
Data.emplace_back(GetRandomNumberNotEqDef(defaultValue));
63+
}
64+
}
65+
}
66+
67+
TPoolFiller(const ui32 poolSize, const ui32 strLen, const TString& defaultValue, const double defaultValueFrq) {
68+
for (ui32 i = 0; i < poolSize; ++i) {
69+
if (RandomNumber<double>() < defaultValueFrq) {
70+
Data.emplace_back(defaultValue);
71+
} else {
72+
Data.emplace_back(NUnitTest::RandomString(strLen, i));
73+
}
74+
}
75+
}
76+
77+
template<class Type>
78+
const ValueType Convert(const Type& v) const {
79+
return v;
80+
}
81+
82+
const ValueType Convert(const TString& str) const {
83+
return arrow::util::string_view(str.data(), str.size());
84+
}
85+
86+
ValueType GetValue(const ui32 idx) const {
87+
return Convert(Data[(2 + 7 * idx) % Data.size()]);
88+
}
89+
};
90+
2891
template <class TArrowInt>
2992
class TIntConstFiller {
3093
public:

ydb/core/kqp/ut/olap/helpers/typed_local.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,31 @@ TString TTypedLocalHelper::GetTestTableSchema() const {
1818
return result;
1919
}
2020

21+
TString TTypedLocalHelper::GetMultiColumnTestTableSchema(ui32 reps) const {
22+
TString result;
23+
result += R"(
24+
Columns { Name: "pk_int" Type: "Int64" NotNull: true }
25+
Columns { Name: "ts" Type: "Timestamp" }
26+
)";
27+
for (ui32 i = 0; i < reps; i++) {
28+
TString strNum = ToString(i);
29+
result += "Columns {Name: \"field_utf" + strNum + "\" Type: \"Utf8\"}\n";
30+
result += "Columns {Name: \"field_int" + strNum + "\" Type: \"Int64\"}\n";
31+
result += "Columns {Name: \"field_uint" + strNum + "\" Type: \"Uint8\"}\n";
32+
result += "Columns {Name: \"field_float" + strNum + "\" Type: \"Float\"}\n";
33+
result += "Columns {Name: \"field_double" + strNum + "\" Type: \"Double\"}\n";
34+
}
35+
result += R"(
36+
KeyColumnNames: "pk_int"
37+
Engine: COLUMN_ENGINE_REPLACING_TIMESERIES
38+
)";
39+
return result;
40+
}
41+
42+
void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) {
43+
CreateSchemaOlapTableWithStore(GetMultiColumnTestTableSchema(reps), TableName, "olapStore", storeShardsCount, tableShardsCount);
44+
}
45+
2146
void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const {
2247
auto session = KikimrRunner.GetTableClient().CreateSession().GetValueSync().GetSession();
2348
auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync();

ydb/core/kqp/ut/olap/helpers/typed_local.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
#include <ydb/public/sdk/cpp/client/ydb_types/status_codes.h>
99

10+
#include <contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h>
11+
1012
#include <library/cpp/json/writer/json_value.h>
1113

1214
namespace NKikimr::NKqp {
@@ -83,11 +85,38 @@ class TTypedLocalHelper: public Tests::NCS::THelper {
8385
TBase::SendDataViaActorSystem(TablePath, batch);
8486
}
8587

88+
void FillMultiColumnTable(ui32 repCount, const double pkKff = 0, const ui32 numRows = 800000) const {
89+
const double frq = 0.9;
90+
NArrow::NConstruction::TPoolFiller<arrow::Int64Type> int64Pool(1000, 0, frq);
91+
NArrow::NConstruction::TPoolFiller<arrow::UInt8Type> uint8Pool(1000, 0, frq);
92+
NArrow::NConstruction::TPoolFiller<arrow::FloatType> floatPool(1000, 0, frq);
93+
NArrow::NConstruction::TPoolFiller<arrow::DoubleType> doublePool(1000, 0, frq);
94+
NArrow::NConstruction::TPoolFiller<NKikimr::NArrow::NConstruction::TStringType> utfPool(1000, 52, "abcde", frq);
95+
96+
std::vector<NArrow::NConstruction::IArrayBuilder::TPtr> builders;
97+
builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TIntSeqFiller<arrow::Int64Type>>::BuildNotNullable("pk_int", numRows * pkKff));
98+
for (ui32 i = 0; i < repCount; i++) {
99+
TString repStr = ToString(i);
100+
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<NKikimr::NArrow::NConstruction::TStringType>>>("field_utf" + repStr, utfPool, i));
101+
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::Int64Type>>>("field_int" + repStr, int64Pool, i));
102+
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::UInt8Type>>>("field_uint" + repStr, uint8Pool, i));
103+
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::FloatType>>>("field_float" + repStr, floatPool, i));
104+
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::DoubleType>>>("field_double" + repStr, doublePool, i));
105+
}
106+
NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders);
107+
std::shared_ptr<arrow::RecordBatch> batch = batchBuilder.BuildBatch(numRows);
108+
TBase::SendDataViaActorSystem(TablePath, batch);
109+
}
110+
111+
86112
void FillPKOnly(const double pkKff = 0, const ui32 numRows = 800000) const;
87113

88114
void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) {
89115
CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount);
90116
}
117+
118+
TString GetMultiColumnTestTableSchema(ui32 reps) const;
119+
void CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount = 4, ui32 tableShardsCount = 3);
91120
};
92121

93-
}
122+
}

0 commit comments

Comments
 (0)