From 5562a8bddff1f02fffa6f9431818b43ff3be7362 Mon Sep 17 00:00:00 2001 From: Matveev Sergei Date: Sun, 6 Jul 2025 23:20:41 +0000 Subject: [PATCH 1/5] test commit --- ydb/core/formats/arrow/arrow_helpers.cpp | 4 +- ydb/core/formats/arrow/converter.cpp | 52 ++++++++++++++++++- .../peephole/kqp_opt_peephole_wide_read.cpp | 20 ++++--- ydb/core/kqp/ut/common/columnshard.cpp | 3 +- ydb/core/kqp/ut/common/columnshard.h | 2 +- ydb/core/kqp/ut/olap/decimal_ut.cpp | 20 +++---- .../test_helper/columnshard_ut_common.h | 6 +-- 7 files changed, 78 insertions(+), 29 deletions(-) diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index 0cc76775251b..de760d4db67a 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -37,8 +37,8 @@ std::shared_ptr CreateEmptyArrowImpl(const NScheme::TTypeInfo& } template <> -std::shared_ptr CreateEmptyArrowImpl(const NScheme::TTypeInfo& typeInfo) { - return arrow::decimal(typeInfo.GetDecimalType().GetPrecision(), typeInfo.GetDecimalType().GetScale()); +std::shared_ptr CreateEmptyArrowImpl(const NScheme::TTypeInfo&) { + return arrow::utf8(); } template <> diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 548c05b805da..1686d27e5e9c 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -53,8 +53,56 @@ static bool ConvertData(TCell& cell, const NScheme::TTypeInfo& colType, TMemoryP static arrow::Status ConvertColumn( const NScheme::TTypeInfo colType, std::shared_ptr& column, std::shared_ptr& field, const bool allowInfDouble) { switch (colType.GetTypeId()) { - case NScheme::NTypeIds::Decimal: + case NScheme::NTypeIds::Decimal: { + const auto id = column->type()->id(); + if (id != arrow::Type::STRING && id != arrow::Type::BINARY) { + return arrow::Status::TypeError("Decimal column must be STRING/BINARY, got ", column->type()->ToString()); + } + + auto strArr = std::static_pointer_cast(column); + + arrow::FixedSizeBinaryBuilder builder(arrow::fixed_size_binary(16), + arrow::default_memory_pool()); + ARROW_RETURN_NOT_OK(builder.Reserve(strArr->length())); + + const ui8 precision = colType.GetDecimalType().GetPrecision(); + const ui8 scale = colType.GetDecimalType().GetScale(); + + auto Int128ToBigEndian = [](NYql::NDecimal::TInt128 v, ui8 out[16]) { + for (int i = 15; i >= 0; --i) { + out[i] = static_cast(v & 0xff); + v >>= 8; + } + }; + + for (int64_t i = 0; i < strArr->length(); ++i) { + if (strArr->IsNull(i)) { + ARROW_RETURN_NOT_OK(builder.AppendNull()); + continue; + } + + arrow::util::string_view sv = strArr->GetView(i); + TStringBuf sbuf(sv.data(), sv.size()); + + NYql::NDecimal::TInt128 dec; + try { + dec = NYql::NDecimal::FromString(sbuf, precision, scale); + } catch (const std::exception& e) { + return arrow::Status::Invalid("Bad decimal literal: ", sbuf); + } + + ui8 buf[16]; + Int128ToBigEndian(dec, buf); + ARROW_RETURN_NOT_OK(builder.Append(buf)); + } + + std::shared_ptr out; + ARROW_RETURN_NOT_OK(builder.Finish(&out)); + + column = std::move(out); + field = arrow::field(field->name(), arrow::fixed_size_binary(16)); return arrow::Status::OK(); + } case NScheme::NTypeIds::JsonDocument: { const static TSet jsonDocArrowTypes{ arrow::Type::BINARY, arrow::Type::STRING }; if (!jsonDocArrowTypes.contains(column->type()->id())) { @@ -266,6 +314,8 @@ bool TArrowToYdbConverter::NeedInplaceConversion(const NScheme::TTypeInfo& typeI bool TArrowToYdbConverter::NeedConversion(const NScheme::TTypeInfo& typeInRequest, const NScheme::TTypeInfo& expectedType) { switch (expectedType.GetTypeId()) { + case NScheme::NTypeIds::Decimal: + return typeInRequest.GetTypeId() == NScheme::NTypeIds::Utf8; case NScheme::NTypeIds::JsonDocument: return typeInRequest.GetTypeId() == NScheme::NTypeIds::Utf8; default: diff --git a/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp b/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp index 16f2f4e14393..47932188168f 100644 --- a/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp +++ b/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp @@ -17,6 +17,8 @@ TExprBase KqpBuildWideReadTable(const TExprBase& node, TExprContext& ctx, TTypeA return node; } + Y_UNUSED(typesCtx); + auto rowType = node.Ref().GetTypeAnn()->Cast()->GetItemType()->Cast(); TVector args; @@ -41,18 +43,14 @@ TExprBase KqpBuildWideReadTable(const TExprBase& node, TExprContext& ctx, TTypeA } else if (auto maybeRead = node.Maybe()) { wideRead = ctx.RenameNode(*node.Raw(), TKqpWideReadTableRanges::CallableName()); } else if (auto maybeRead = node.Maybe()) { - if (typesCtx.IsBlockEngineEnabled()) { - wideRead = Build(ctx, node.Pos()) - .Input() - .Input() - .Input(ctx.RenameNode(*node.Raw(), TKqpBlockReadOlapTableRanges::CallableName())) - .Build() + wideRead = Build(ctx, node.Pos()) + .Input() + .Input() + .Input(ctx.RenameNode(*node.Raw(), TKqpBlockReadOlapTableRanges::CallableName())) .Build() - .Done() - .Ptr(); - } else { - wideRead = ctx.RenameNode(*node.Raw(), TKqpWideReadOlapTableRanges::CallableName()); - } + .Build() + .Done() + .Ptr(); } else { YQL_ENSURE(false, "Unknown read table operation: " << node.Ptr()->Content()); } diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index f44ce45889b5..ac3d5d4a1e25 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -1,6 +1,7 @@ #include "columnshard.h" #include +#include #include #include #include @@ -391,7 +392,7 @@ namespace NKqp { case NScheme::NTypeIds::JsonDocument: return arrow::field(name, arrow::binary(), nullable); case NScheme::NTypeIds::Decimal: - return arrow::field(name, arrow::decimal(typeInfo.GetDecimalType().GetPrecision(), typeInfo.GetDecimalType().GetScale()), nullable); + return arrow::field(name, arrow::utf8(), nullable); case NScheme::NTypeIds::Pg: switch (NPg::PgTypeIdFromTypeDesc(typeInfo.GetPgTypeDesc())) { case INT2OID: diff --git a/ydb/core/kqp/ut/common/columnshard.h b/ydb/core/kqp/ut/common/columnshard.h index e4f1005344bb..d75e4a3ded9c 100644 --- a/ydb/core/kqp/ut/common/columnshard.h +++ b/ydb/core/kqp/ut/common/columnshard.h @@ -81,7 +81,7 @@ class TTestHelper { return *this; } - private: + private: virtual TString GetObjectType() const = 0; TString BuildColumnsStr(const TVector& clumns) const; std::shared_ptr BuildField(const TString name, const NScheme::TTypeInfo& typeInfo, bool nullable) const; diff --git a/ydb/core/kqp/ut/olap/decimal_ut.cpp b/ydb/core/kqp/ut/olap/decimal_ut.cpp index 99cb4795ffd2..c37075656f12 100644 --- a/ydb/core/kqp/ut/olap/decimal_ut.cpp +++ b/ydb/core/kqp/ut/olap/decimal_ut.cpp @@ -72,14 +72,14 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { { TTestHelper::TUpdatesBuilder inserter = Inserter(); - inserter.AddRow().Add(1).Add(4).Add(TDecimalValue("3.14", Precision, Scale)); - inserter.AddRow().Add(2).Add(3).Add(TDecimalValue("8.16", Precision, Scale)); + inserter.AddRow().Add(1).Add(4).Add("3.14"); + inserter.AddRow().Add(2).Add(3).Add("8.16"); Upsert(inserter); } { TTestHelper::TUpdatesBuilder inserter = Inserter(); - inserter.AddRow().Add(4).Add(1).Add(TDecimalValue("12.46", Precision, Scale)); - inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("8.492", Precision, Scale)); + inserter.AddRow().Add(4).Add(1).Add("12.46"); + inserter.AddRow().Add(3).Add(2).Add("8.492"); Upsert(inserter); } @@ -96,10 +96,10 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { { TTestHelper::TUpdatesBuilder inserter = Inserter(); - inserter.AddRow().Add(1).Add(1).Add(TDecimalValue("12.46", Precision, Scale)); - inserter.AddRow().Add(2).Add(1).Add(TDecimalValue("8.16", Precision, Scale)); - inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("12.46", Precision, Scale)); - inserter.AddRow().Add(4).Add(2).Add(TDecimalValue("8.16", Precision, Scale)); + inserter.AddRow().Add(1).Add(1).Add("12.46"); + inserter.AddRow().Add(2).Add(1).Add("8.16"); + inserter.AddRow().Add(3).Add(2).Add("12.46"); + inserter.AddRow().Add(4).Add(2).Add("8.16"); Upsert(inserter); } } @@ -220,8 +220,8 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { auto insert = [](TDecimalTestCase& tester) { TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); - inserter.AddRow().Add(5).Add(12).Add(TDecimalValue("8.492", tester.GetPrecision(), tester.GetScale())); - inserter.AddRow().Add(6).Add(30).Add(TDecimalValue("12.46", tester.GetPrecision(), tester.GetScale())); + inserter.AddRow().Add(5).Add(12).Add("8.492"); + inserter.AddRow().Add(6).Add(30).Add("12.46"); tester.Upsert(inserter); }; diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 38f878b400ed..ea1777222154 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -534,9 +534,9 @@ class TTableUpdatesBuilder { } } - if constexpr (std::is_same::value) { - if constexpr (arrow::is_decimal128_type::value) { - Y_ABORT_UNLESS(typedBuilder.Append(arrow::Decimal128(data.Hi_, data.Low_)).ok()); + if constexpr (std::is_same::value) { + if constexpr (arrow::is_string_like_type::value) { + Y_ABORT_UNLESS(typedBuilder.Append(data.ToString()).ok()); return true; } } From f4b5ee7c501ebdb2209d4dc1b3f916330a6eaa36 Mon Sep 17 00:00:00 2001 From: Matveev Sergei Date: Sun, 6 Jul 2025 23:26:41 +0000 Subject: [PATCH 2/5] test commit --- ydb/core/formats/arrow/converter.cpp | 12 ++++-------- .../columnshard/test_helper/columnshard_ut_common.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 1686d27e5e9c..3d995482dd27 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -56,7 +56,7 @@ static arrow::Status ConvertColumn( case NScheme::NTypeIds::Decimal: { const auto id = column->type()->id(); if (id != arrow::Type::STRING && id != arrow::Type::BINARY) { - return arrow::Status::TypeError("Decimal column must be STRING/BINARY, got ", column->type()->ToString()); + return arrow::Status::TypeError("Decimal column must be string, but got ", column->type()->ToString()); } auto strArr = std::static_pointer_cast(column); @@ -69,13 +69,13 @@ static arrow::Status ConvertColumn( const ui8 scale = colType.GetDecimalType().GetScale(); auto Int128ToBigEndian = [](NYql::NDecimal::TInt128 v, ui8 out[16]) { - for (int i = 15; i >= 0; --i) { + for (i32 i = 15; i >= 0; --i) { out[i] = static_cast(v & 0xff); v >>= 8; } }; - for (int64_t i = 0; i < strArr->length(); ++i) { + for (i64 i = 0; i < strArr->length(); ++i) { if (strArr->IsNull(i)) { ARROW_RETURN_NOT_OK(builder.AppendNull()); continue; @@ -85,11 +85,7 @@ static arrow::Status ConvertColumn( TStringBuf sbuf(sv.data(), sv.size()); NYql::NDecimal::TInt128 dec; - try { - dec = NYql::NDecimal::FromString(sbuf, precision, scale); - } catch (const std::exception& e) { - return arrow::Status::Invalid("Bad decimal literal: ", sbuf); - } + dec = NYql::NDecimal::FromString(sbuf, precision, scale); ui8 buf[16]; Int128ToBigEndian(dec, buf); diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index ea1777222154..7800efbc5345 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -534,7 +534,7 @@ class TTableUpdatesBuilder { } } - if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { if constexpr (arrow::is_string_like_type::value) { Y_ABORT_UNLESS(typedBuilder.Append(data.ToString()).ok()); return true; From 1f1e63aa36c51a3cd4fdf89acd35b53f3a177f7c Mon Sep 17 00:00:00 2001 From: Matveev Sergei Date: Mon, 7 Jul 2025 15:40:20 +0000 Subject: [PATCH 3/5] test commit --- .../formats/arrow/arrow_batch_builder.cpp | 23 ++++++--- ydb/core/formats/arrow/arrow_helpers.cpp | 5 +- ydb/core/formats/arrow/converter.cpp | 48 +------------------ ydb/core/formats/arrow/converter.h | 17 +++++++ ydb/core/formats/arrow/switch/switch_type.h | 2 +- ydb/core/grpc_services/rpc_load_rows.cpp | 27 +++++++++-- ydb/core/kqp/ut/common/columnshard.cpp | 13 ++++- ydb/core/kqp/ut/common/columnshard.h | 2 +- ydb/core/kqp/ut/olap/decimal_ut.cpp | 20 ++++---- .../tx/tx_proxy/upload_rows_common_impl.h | 4 +- .../formats/arrow/switch/switch_type.h | 2 +- 11 files changed, 89 insertions(+), 74 deletions(-) diff --git a/ydb/core/formats/arrow/arrow_batch_builder.cpp b/ydb/core/formats/arrow/arrow_batch_builder.cpp index 7db800ffd816..5d87a89651ab 100644 --- a/ydb/core/formats/arrow/arrow_batch_builder.cpp +++ b/ydb/core/formats/arrow/arrow_batch_builder.cpp @@ -35,15 +35,26 @@ arrow::Status AppendCell(arrow::StringBuilder& builder, const TCell& cell) { return builder.Append(cell.Data(), cell.Size()); } -arrow::Status AppendCell(arrow::Decimal128Builder& builder, const TCell& cell) { +// arrow::Status AppendCell(arrow::Decimal128Builder& builder, const TCell& cell) { +// if (cell.IsNull()) { +// return builder.AppendNull(); +// } + +// /// @warning There's no conversion for special YQL Decimal valies here, +// /// so we could convert them to Arrow and back but cannot calculate anything on them. +// /// We need separate Arrow.Decimal, YQL.Decimal, CH.Decimal and YDB.Decimal in future. +// return builder.Append(cell.Data()); +// } + +arrow::Status AppendCell(arrow::FixedSizeBinaryBuilder& builder, const TCell& cell) { + std::cout << "[DEBUG] AppendCell: is_null=" << cell.IsNull() << " size=" << cell.Size() << std::endl; if (cell.IsNull()) { return builder.AppendNull(); } - - /// @warning There's no conversion for special YQL Decimal valies here, - /// so we could convert them to Arrow and back but cannot calculate anything on them. - /// We need separate Arrow.Decimal, YQL.Decimal, CH.Decimal and YDB.Decimal in future. - return builder.Append(cell.Data()); + if (cell.Size() != 16) { + return arrow::Status::Invalid("Decimal cell must be 16 bytes, got ", cell.Size()); + } + return builder.Append(reinterpret_cast(cell.Data())); } template diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index de760d4db67a..f42e0ea873ef 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -37,8 +37,9 @@ std::shared_ptr CreateEmptyArrowImpl(const NScheme::TTypeInfo& } template <> -std::shared_ptr CreateEmptyArrowImpl(const NScheme::TTypeInfo&) { - return arrow::utf8(); +std::shared_ptr CreateEmptyArrowImpl(const NScheme::TTypeInfo& typeInfo) { + Y_UNUSED(typeInfo); + return std::make_shared(16); } template <> diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 3d995482dd27..548c05b805da 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -53,52 +53,8 @@ static bool ConvertData(TCell& cell, const NScheme::TTypeInfo& colType, TMemoryP static arrow::Status ConvertColumn( const NScheme::TTypeInfo colType, std::shared_ptr& column, std::shared_ptr& field, const bool allowInfDouble) { switch (colType.GetTypeId()) { - case NScheme::NTypeIds::Decimal: { - const auto id = column->type()->id(); - if (id != arrow::Type::STRING && id != arrow::Type::BINARY) { - return arrow::Status::TypeError("Decimal column must be string, but got ", column->type()->ToString()); - } - - auto strArr = std::static_pointer_cast(column); - - arrow::FixedSizeBinaryBuilder builder(arrow::fixed_size_binary(16), - arrow::default_memory_pool()); - ARROW_RETURN_NOT_OK(builder.Reserve(strArr->length())); - - const ui8 precision = colType.GetDecimalType().GetPrecision(); - const ui8 scale = colType.GetDecimalType().GetScale(); - - auto Int128ToBigEndian = [](NYql::NDecimal::TInt128 v, ui8 out[16]) { - for (i32 i = 15; i >= 0; --i) { - out[i] = static_cast(v & 0xff); - v >>= 8; - } - }; - - for (i64 i = 0; i < strArr->length(); ++i) { - if (strArr->IsNull(i)) { - ARROW_RETURN_NOT_OK(builder.AppendNull()); - continue; - } - - arrow::util::string_view sv = strArr->GetView(i); - TStringBuf sbuf(sv.data(), sv.size()); - - NYql::NDecimal::TInt128 dec; - dec = NYql::NDecimal::FromString(sbuf, precision, scale); - - ui8 buf[16]; - Int128ToBigEndian(dec, buf); - ARROW_RETURN_NOT_OK(builder.Append(buf)); - } - - std::shared_ptr out; - ARROW_RETURN_NOT_OK(builder.Finish(&out)); - - column = std::move(out); - field = arrow::field(field->name(), arrow::fixed_size_binary(16)); + case NScheme::NTypeIds::Decimal: return arrow::Status::OK(); - } case NScheme::NTypeIds::JsonDocument: { const static TSet jsonDocArrowTypes{ arrow::Type::BINARY, arrow::Type::STRING }; if (!jsonDocArrowTypes.contains(column->type()->id())) { @@ -310,8 +266,6 @@ bool TArrowToYdbConverter::NeedInplaceConversion(const NScheme::TTypeInfo& typeI bool TArrowToYdbConverter::NeedConversion(const NScheme::TTypeInfo& typeInRequest, const NScheme::TTypeInfo& expectedType) { switch (expectedType.GetTypeId()) { - case NScheme::NTypeIds::Decimal: - return typeInRequest.GetTypeId() == NScheme::NTypeIds::Utf8; case NScheme::NTypeIds::JsonDocument: return typeInRequest.GetTypeId() == NScheme::NTypeIds::Utf8; default: diff --git a/ydb/core/formats/arrow/converter.h b/ydb/core/formats/arrow/converter.h index 8288d50cb178..e74cddb372bc 100644 --- a/ydb/core/formats/arrow/converter.h +++ b/ydb/core/formats/arrow/converter.h @@ -33,7 +33,12 @@ class TArrowToYdbConverter { template TCell MakeCellFromView(const std::shared_ptr& column, i64 row) { auto array = std::static_pointer_cast(column); + if (array->IsNull(row)) { + std::cout << "[DEBUG] MakeCellFromView: row=" << row << " is_null=1 size=0" << std::endl; + return TCell(); + } auto data = array->GetView(row); + std::cout << "[DEBUG] MakeCellFromView: row=" << row << " is_null=0 size=" << data.size() << std::endl; return TCell(data.data(), data.size()); } @@ -57,6 +62,18 @@ class TArrowToYdbConverter { return MakeCellFromView(column, row); } + template <> + TCell MakeCell(const std::shared_ptr& column, i64 row) { + auto array = std::static_pointer_cast(column); + if (array->IsNull(row)) { + std::cout << "[DEBUG] MakeCell: row=" << row << " is_null=1 size=0" << std::endl; + return TCell(); + } + auto data = array->GetView(row); + std::cout << "[DEBUG] MakeCell: row=" << row << " is_null=0 size=" << data.size() << std::endl; + return TCell(data.data(), data.size()); + } + public: static bool NeedDataConversion(const NScheme::TTypeInfo& colType); diff --git a/ydb/core/formats/arrow/switch/switch_type.h b/ydb/core/formats/arrow/switch/switch_type.h index b5183df39d9a..fd8b167ef5e4 100644 --- a/ydb/core/formats/arrow/switch/switch_type.h +++ b/ydb/core/formats/arrow/switch/switch_type.h @@ -64,7 +64,7 @@ template case NScheme::NTypeIds::Interval: return callback(TTypeWrapper()); case NScheme::NTypeIds::Decimal: - return callback(TTypeWrapper()); + return callback(TTypeWrapper()); case NScheme::NTypeIds::Datetime64: case NScheme::NTypeIds::Timestamp64: diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index e544a56f94db..0cba4b249157 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -18,6 +18,7 @@ #include #include +#include namespace NKikimr { namespace NGRpcService { @@ -28,7 +29,7 @@ using namespace Ydb; namespace { // TODO: no mapping for DATE, DATETIME, TZ_*, YSON, JSON, UUID, JSON_DOCUMENT, DYNUMBER -bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType) { +bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType, const arrow::Field* field = nullptr) { switch (type.id()) { case arrow::Type::BOOL: toType.set_type_id(Ydb::Type::BOOL); @@ -82,9 +83,29 @@ bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType) decimalType->set_scale(arrowDecimal->scale()); return true; } + case arrow::Type::FIXED_SIZE_BINARY: { + auto& fsb = static_cast(type); + if (fsb.byte_width() == 16) { + Ydb::DecimalType* decimalType = toType.mutable_decimal_type(); + ui32 precision = 22, scale = 9; // значения по умолчанию + if (field && field->metadata()) { + auto precisionMeta = field->metadata()->Get("precision").ValueOr(""); + auto scaleMeta = field->metadata()->Get("scale").ValueOr(""); + if (!precisionMeta.empty()) { + precision = FromString(precisionMeta); + } + if (!scaleMeta.empty()) { + scale = FromString(scaleMeta); + } + } + decimalType->set_precision(precision); + decimalType->set_scale(scale); + return true; + } + break; + } case arrow::Type::NA: case arrow::Type::HALF_FLOAT: - case arrow::Type::FIXED_SIZE_BINARY: case arrow::Type::DATE32: case arrow::Type::DATE64: case arrow::Type::TIME32: @@ -410,7 +431,7 @@ class TUploadColumnsRPCPublic : public NTxProxy::TUploadRowsBasetype(); Ydb::Type ydbType; - if (!ConvertArrowToYdbPrimitive(*type, ydbType)) { + if (!ConvertArrowToYdbPrimitive(*type, ydbType, field.get())) { return TConclusionStatus::Fail("Cannot convert arrow type to ydb one: " + type->ToString()); } out.emplace_back(name, std::move(ydbType)); diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index ac3d5d4a1e25..212e09333f21 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -391,8 +391,17 @@ namespace NKqp { return arrow::field(name, arrow::int64(), nullable); case NScheme::NTypeIds::JsonDocument: return arrow::field(name, arrow::binary(), nullable); - case NScheme::NTypeIds::Decimal: - return arrow::field(name, arrow::utf8(), nullable); + case NScheme::NTypeIds::Decimal: { + // Добавляем precision/scale в метаданные + auto meta = arrow::KeyValueMetadata::Make( + std::vector{"precision", "scale"}, + std::vector{ + ToString(typeInfo.GetDecimalType().GetPrecision()), + ToString(typeInfo.GetDecimalType().GetScale()) + } + ); + return arrow::field(name, arrow::fixed_size_binary(16), nullable, meta); + } case NScheme::NTypeIds::Pg: switch (NPg::PgTypeIdFromTypeDesc(typeInfo.GetPgTypeDesc())) { case INT2OID: diff --git a/ydb/core/kqp/ut/common/columnshard.h b/ydb/core/kqp/ut/common/columnshard.h index d75e4a3ded9c..e4f1005344bb 100644 --- a/ydb/core/kqp/ut/common/columnshard.h +++ b/ydb/core/kqp/ut/common/columnshard.h @@ -81,7 +81,7 @@ class TTestHelper { return *this; } - private: + private: virtual TString GetObjectType() const = 0; TString BuildColumnsStr(const TVector& clumns) const; std::shared_ptr BuildField(const TString name, const NScheme::TTypeInfo& typeInfo, bool nullable) const; diff --git a/ydb/core/kqp/ut/olap/decimal_ut.cpp b/ydb/core/kqp/ut/olap/decimal_ut.cpp index c37075656f12..99cb4795ffd2 100644 --- a/ydb/core/kqp/ut/olap/decimal_ut.cpp +++ b/ydb/core/kqp/ut/olap/decimal_ut.cpp @@ -72,14 +72,14 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { { TTestHelper::TUpdatesBuilder inserter = Inserter(); - inserter.AddRow().Add(1).Add(4).Add("3.14"); - inserter.AddRow().Add(2).Add(3).Add("8.16"); + inserter.AddRow().Add(1).Add(4).Add(TDecimalValue("3.14", Precision, Scale)); + inserter.AddRow().Add(2).Add(3).Add(TDecimalValue("8.16", Precision, Scale)); Upsert(inserter); } { TTestHelper::TUpdatesBuilder inserter = Inserter(); - inserter.AddRow().Add(4).Add(1).Add("12.46"); - inserter.AddRow().Add(3).Add(2).Add("8.492"); + inserter.AddRow().Add(4).Add(1).Add(TDecimalValue("12.46", Precision, Scale)); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("8.492", Precision, Scale)); Upsert(inserter); } @@ -96,10 +96,10 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { { TTestHelper::TUpdatesBuilder inserter = Inserter(); - inserter.AddRow().Add(1).Add(1).Add("12.46"); - inserter.AddRow().Add(2).Add(1).Add("8.16"); - inserter.AddRow().Add(3).Add(2).Add("12.46"); - inserter.AddRow().Add(4).Add(2).Add("8.16"); + inserter.AddRow().Add(1).Add(1).Add(TDecimalValue("12.46", Precision, Scale)); + inserter.AddRow().Add(2).Add(1).Add(TDecimalValue("8.16", Precision, Scale)); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("12.46", Precision, Scale)); + inserter.AddRow().Add(4).Add(2).Add(TDecimalValue("8.16", Precision, Scale)); Upsert(inserter); } } @@ -220,8 +220,8 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { auto insert = [](TDecimalTestCase& tester) { TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); - inserter.AddRow().Add(5).Add(12).Add("8.492"); - inserter.AddRow().Add(6).Add(30).Add("12.46"); + inserter.AddRow().Add(5).Add(12).Add(TDecimalValue("8.492", tester.GetPrecision(), tester.GetScale())); + inserter.AddRow().Add(6).Add(30).Add(TDecimalValue("12.46", tester.GetPrecision(), tester.GetScale())); tester.Upsert(inserter); }; diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h index 4f419a63e434..7a8ec5d742de 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h @@ -519,7 +519,9 @@ class TUploadRowsBase : public TActorBootstrapped()); case arrow::Type::DECIMAL: - return f(TTypeWrapper()); + return f(TTypeWrapper()); case arrow::Type::DURATION: return f(TTypeWrapper()); case arrow::Type::LARGE_STRING: From f40f23393a87505d8d76526150c4a73bad30dfb2 Mon Sep 17 00:00:00 2001 From: Matveev Sergei Date: Mon, 7 Jul 2025 15:42:49 +0000 Subject: [PATCH 4/5] test commit --- ydb/core/grpc_services/rpc_load_rows.cpp | 2 +- ydb/core/kqp/ut/common/columnshard.cpp | 1 - .../columnshard/test_helper/columnshard_ut_common.h | 12 +++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index 0cba4b249157..fbbc5e7348cf 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -87,7 +87,7 @@ bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType, auto& fsb = static_cast(type); if (fsb.byte_width() == 16) { Ydb::DecimalType* decimalType = toType.mutable_decimal_type(); - ui32 precision = 22, scale = 9; // значения по умолчанию + ui32 precision = 22, scale = 9; if (field && field->metadata()) { auto precisionMeta = field->metadata()->Get("precision").ValueOr(""); auto scaleMeta = field->metadata()->Get("scale").ValueOr(""); diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index 212e09333f21..ab40dd9eb1bc 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -392,7 +392,6 @@ namespace NKqp { case NScheme::NTypeIds::JsonDocument: return arrow::field(name, arrow::binary(), nullable); case NScheme::NTypeIds::Decimal: { - // Добавляем precision/scale в метаданные auto meta = arrow::KeyValueMetadata::Make( std::vector{"precision", "scale"}, std::vector{ diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 7800efbc5345..3d5630671932 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -534,9 +534,15 @@ class TTableUpdatesBuilder { } } - if constexpr (std::is_same::value) { - if constexpr (arrow::is_string_like_type::value) { - Y_ABORT_UNLESS(typedBuilder.Append(data.ToString()).ok()); + if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { + char bytes[16] = {0}; + // Big-endian: Hi_ (signed) в первые 8 байт, Low_ (unsigned) в последние 8 байт + for (int i = 0; i < 8; ++i) { + bytes[i] = (data.Hi_ >> (56 - i * 8)) & 0xFF; + bytes[8 + i] = (data.Low_ >> (56 - i * 8)) & 0xFF; + } + Y_ABORT_UNLESS(typedBuilder.Append(bytes).ok()); return true; } } From 143167e85efc8aeff8f4cb91f62dd35eee9289c1 Mon Sep 17 00:00:00 2001 From: Matveev Sergei Date: Mon, 7 Jul 2025 20:37:44 +0000 Subject: [PATCH 5/5] test commit --- ydb/core/kqp/ut/olap/decimal_ut.cpp | 6 +++--- .../test_helper/columnshard_ut_common.h | 8 +++----- ydb/tests/functional/tpc/medium/test_tpch.py | 16 +++++++++++++++- ydb/tests/olap/lib/ydb_cli.py | 12 +++++++++--- ydb/tests/olap/load/lib/conftest.py | 5 ++++- 5 files changed, 34 insertions(+), 13 deletions(-) diff --git a/ydb/core/kqp/ut/olap/decimal_ut.cpp b/ydb/core/kqp/ut/olap/decimal_ut.cpp index 99cb4795ffd2..3e0fb61da7cd 100644 --- a/ydb/core/kqp/ut/olap/decimal_ut.cpp +++ b/ydb/core/kqp/ut/olap/decimal_ut.cpp @@ -244,9 +244,9 @@ Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { tester35.PrepareTable1(); auto check = [](const TDecimalTestCase& tester) { - tester.CheckQuery("SELECT min(dec) FROM `/Root/Table1`", "[[[\"3.14\"]]]"); - tester.CheckQuery("SELECT max(dec) FROM `/Root/Table1`", "[[[\"12.46\"]]]"); - tester.CheckQuery("SELECT sum(dec) FROM `/Root/Table1`", "[[[\"32.252\"]]]"); + tester.CheckQuery("SELECT min(dec) FROM `/Root/Table1`", "[[[\"3.14\"]]]", EQueryMode::EXECUTE_QUERY); + tester.CheckQuery("SELECT max(dec) FROM `/Root/Table1`", "[[[\"12.46\"]]]", EQueryMode::EXECUTE_QUERY); + tester.CheckQuery("SELECT sum(dec) FROM `/Root/Table1`", "[[[\"32.252\"]]]", EQueryMode::EXECUTE_QUERY); }; check(tester22); diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 3d5630671932..5a35cd98e4c3 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -537,11 +537,9 @@ class TTableUpdatesBuilder { if constexpr (std::is_same::value) { if constexpr (std::is_same::value) { char bytes[16] = {0}; - // Big-endian: Hi_ (signed) в первые 8 байт, Low_ (unsigned) в последние 8 байт - for (int i = 0; i < 8; ++i) { - bytes[i] = (data.Hi_ >> (56 - i * 8)) & 0xFF; - bytes[8 + i] = (data.Low_ >> (56 - i * 8)) & 0xFF; - } + // Little-endian: Low_ (unsigned) в первые 8 байт, Hi_ (signed) в последние 8 байт + std::memcpy(bytes, &data.Low_, 8); + std::memcpy(bytes + 8, &data.Hi_, 8); Y_ABORT_UNLESS(typedBuilder.Append(bytes).ok()); return true; } diff --git a/ydb/tests/functional/tpc/medium/test_tpch.py b/ydb/tests/functional/tpc/medium/test_tpch.py index 4dd5f0bf0e9b..1c9ab637973d 100644 --- a/ydb/tests/functional/tpc/medium/test_tpch.py +++ b/ydb/tests/functional/tpc/medium/test_tpch.py @@ -5,9 +5,23 @@ class TestTpchS1(tpch.TestTpch1, FunctionalTestBase): iterations: int = 1 + @classmethod + def addition_init_params(cls) -> list[str]: + if cls.float_mode: + return ['--float-mode', cls.float_mode] + return [] + @classmethod def setup_class(cls) -> None: cls.setup_cluster() - cls.run_cli(['workload', 'tpch', '-p', 'olap_yatests/tpch/s1', 'init', '--store=column']) + cls.run_cli(['workload', 'tpch', '-p', 'olap_yatests/tpch/s1', 'init', '--store=row'] + cls.addition_init_params()) cls.run_cli(['workload', 'tpch', '-p', 'olap_yatests/tpch/s1', 'import', 'generator', '--scale=1']) tpch.TestTpch1.setup_class() + + +class TestTpchS1Decimal_22_9(TestTpchS1): + float_mode = 'decimal_ydb' + + +class TestTpchS1DecimalNative(TestTpchS1): + float_mode = 'decimal' \ No newline at end of file diff --git a/ydb/tests/olap/lib/ydb_cli.py b/ydb/tests/olap/lib/ydb_cli.py index 353d25f07419..ab842c02fb4d 100644 --- a/ydb/tests/olap/lib/ydb_cli.py +++ b/ydb/tests/olap/lib/ydb_cli.py @@ -152,7 +152,9 @@ def __init__(self, scale: Optional[int], query_prefix: Optional[str], external_path: str, - threads: int): + threads: int, + float_mode: str, + ): self.result = YdbCliHelper.WorkloadRunResult() self.iterations = iterations self.check_canonical = check_canonical @@ -172,6 +174,7 @@ def __init__(self, self.__plan_path = f'{self.__prefix}.plan' self.__query_output_path = f'{self.__prefix}.result' self.json_path = f'{self.__prefix}.stats.json' + self.float_mode = float_mode def get_plan_path(self, query_name: str, plan_name: Any) -> str: return f'{self.__plan_path}.{query_name}.{plan_name}' @@ -204,6 +207,8 @@ def __get_cmd(self) -> list[str]: cmd += ['--scale', str(self.scale)] if self.threads > 0: cmd += ['--threads', str(self.threads)] + if self.float_mode: + cmd += ['--float-mode', self.float_mode] return cmd def run(self) -> YdbCliHelper.WorkloadRunResult: @@ -343,7 +348,7 @@ def __process(self) -> YdbCliHelper.WorkloadRunResult: @staticmethod def workload_run(workload_type: WorkloadType, path: str, query_names: list[str], iterations: int = 5, timeout: float = 100., check_canonical: CheckCanonicalPolicy = CheckCanonicalPolicy.NO, query_syntax: str = '', - scale: Optional[int] = None, query_prefix=None, external_path='', threads: int = 0) -> dict[str, YdbCliHelper.WorkloadRunResult]: + scale: Optional[int] = None, query_prefix=None, external_path='', threads: int = 0, float_mode: str = '') -> dict[str, YdbCliHelper.WorkloadRunResult]: runner = YdbCliHelper.WorkloadRunner( workload_type, path, @@ -355,7 +360,8 @@ def workload_run(workload_type: WorkloadType, path: str, query_names: list[str], scale, query_prefix=query_prefix, external_path=external_path, - threads=threads + threads=threads, + float_mode=float_mode, ) extended_query_names = query_names + ["Sum", "Avg", "GAvg"] if runner.run(): diff --git a/ydb/tests/olap/load/lib/conftest.py b/ydb/tests/olap/load/lib/conftest.py index 21f838cdbd91..b3dc07caf80b 100644 --- a/ydb/tests/olap/load/lib/conftest.py +++ b/ydb/tests/olap/load/lib/conftest.py @@ -38,6 +38,7 @@ def __init__(self, iterations: Optional[int] = None, timeout: Optional[float] = scale: Optional[int] = None query_prefix: str = get_external_param('query-prefix', '') verify_data: bool = True + float_mode: str = '' __nodes_state: Optional[dict[str, YdbCluster.Node]] = None @classmethod @@ -465,6 +466,7 @@ def run_workload_test(self, path: str, query_num: Optional[int] = None, query_na scale=self.scale, query_prefix=qparams.query_prefix, external_path=self.get_external_path(), + float_mode=self.float_mode, )[query_name] self.process_query_result(result, query_name, True) @@ -827,7 +829,8 @@ def do_setup_class(cls): scale=cls.scale, query_prefix=qparams.query_prefix, external_path=cls.get_external_path(), - threads=cls.threads + threads=cls.threads, + float_mode=cls.float_mode, ) def test(self, query_name):