From e48b417cc937221a06458d6c4560ba1dc2a9cf03 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 09:16:28 +0000 Subject: [PATCH 01/35] feat: allocate Ydb::Value on arena if provided in BuildList --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 94 +++++++++++++------ ydb/public/lib/ydb_cli/common/csv_parser.h | 15 ++- .../lib/ydb_cli/common/csv_parser_ut.cpp | 6 +- ydb/public/lib/ydb_cli/import/import.cpp | 6 +- 4 files changed, 86 insertions(+), 35 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 9b99bbfa6d1f..da7585ab1026 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -165,7 +165,7 @@ class TCsvToYdbConverter { } case EPrimitiveType::Interval64: Builder.Interval64(GetArithmetic(token)); - break; + break; case EPrimitiveType::TzDate: Builder.TzDate(token); break; @@ -470,7 +470,7 @@ TStringBuf Consume(NCsvFormat::CsvSplitter& splitter, TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::optional& nullValue, const std::map* destinationTypes, - const std::map* paramSources) + const std::map* paramSources) : HeaderRow(std::move(headerRow)) , Delimeter(delimeter) , NullValue(nullValue) @@ -483,7 +483,7 @@ TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::opt TCsvParser::TCsvParser(TVector&& header, const char delimeter, const std::optional& nullValue, const std::map* destinationTypes, - const std::map* paramSources) + const std::map* paramSources) : Header(std::move(header)) , Delimeter(delimeter) , NullValue(nullValue) @@ -558,41 +558,76 @@ void TCsvParser::BuildValue(TString& data, TValueBuilder& builder, const TType& builder.EndStruct(); } -TValue TCsvParser::BuildList(std::vector& lines, const TString& filename, std::optional row) const { +TValue TCsvParser::BuildList(const std::vector& lines, const TString& filename, std::optional row, google::protobuf::Arena* arena) const { std::vector> columnTypeParsers; columnTypeParsers.reserve(ResultColumnCount); for (const TType* type : ResultLineTypesSorted) { columnTypeParsers.push_back(std::make_unique(*type)); } - - Ydb::Value listValue; - auto* listItems = listValue.mutable_items(); - listItems->Reserve(lines.size()); - for (auto& line : lines) { - NCsvFormat::CsvSplitter splitter(line, Delimeter); - TParseMetadata meta {row, filename}; - auto* structItems = listItems->Add()->mutable_items(); - structItems->Reserve(ResultColumnCount); - auto headerIt = Header.cbegin(); - auto skipIt = SkipBitMap.begin(); - auto typeParserIt = columnTypeParsers.begin(); - do { - if (headerIt == Header.cend()) { // SkipBitMap has same size as Header - throw FormatError(yexception() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << line << "\"", meta); + + if (arena) { + // Arena allocation path + Ydb::Value* valuePtr = google::protobuf::Arena::CreateMessage(arena); + auto* listItems = valuePtr->mutable_items(); + listItems->Reserve(lines.size()); + + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); } - TStringBuf nextField = Consume(splitter, meta, *headerIt); - if (!*skipIt) { - *structItems->Add() = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt).GetProto(); - ++typeParserIt; + } + + // Return a TValue that references the arena-allocated message + return TValue(ResultListType.value(), std::move(*valuePtr)); + } else { + // Original path with local value object + Ydb::Value valueProto; + auto* listItems = valueProto.mutable_items(); + listItems->Reserve(lines.size()); + + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); } - ++headerIt; - ++skipIt; - } while (splitter.Step()); - if (row.has_value()) { - ++row.value(); } + + // Return a TValue that takes ownership via move + return TValue(ResultListType.value(), std::move(valueProto)); } - return TValue(ResultListType.value(), std::move(listValue)); +} + +// Helper method to process a single CSV line +void TCsvParser::ProcessCsvLine( + const TString& line, + google::protobuf::RepeatedPtrField* listItems, + const std::vector>& columnTypeParsers, + std::optional currentRow, + const TString& filename) const { + + NCsvFormat::CsvSplitter splitter(line, Delimeter); + auto* structItems = listItems->Add()->mutable_items(); + structItems->Reserve(ResultColumnCount); + + const TParseMetadata meta {currentRow, filename}; + + auto headerIt = Header.cbegin(); + auto skipIt = SkipBitMap.begin(); + auto typeParserIt = columnTypeParsers.begin(); + + do { + if (headerIt == Header.cend()) { // SkipBitMap has same size as Header + throw FormatError(yexception() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << line << "\"", meta); + } + TStringBuf nextField = Consume(splitter, meta, *headerIt); + if (!*skipIt) { + *structItems->Add() = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt).GetProto(); + ++typeParserIt; + } + ++headerIt; + ++skipIt; + } while (splitter.Step()); } void TCsvParser::BuildLineType() { @@ -763,3 +798,4 @@ const TVector& TCsvParser::GetHeader() { } } + diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index f8ade2b8e9f6..7c89c7f7be39 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include @@ -66,8 +68,9 @@ class TCsvParser { void BuildParams(TString& data, TParamsBuilder& builder, const TParseMetadata& meta) const; void BuildValue(TString& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const; - TValue BuildList(std::vector& lines, const TString& filename, - std::optional row = std::nullopt) const; + TValue BuildList(const std::vector& lines, const TString& filename, + std::optional row = std::nullopt, + google::protobuf::Arena* arena = nullptr) const; void BuildLineType(); const TVector& GetHeader(); void ParseLineTypes(TString& line, TPossibleTypes& possibleTypes, const TParseMetadata& meta); @@ -92,6 +95,14 @@ class TCsvParser { // Types of columns in a single row in resulting TValue. // Column order according to the header, though can have less elements than the Header std::vector ResultLineTypesSorted; + + // Helper method to process a single line of CSV data + void ProcessCsvLine( + const TString& line, + google::protobuf::RepeatedPtrField* listItems, + const std::vector>& columnTypeParsers, + std::optional currentRow, + const TString& filename) const; }; } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp index ffe3de88bf27..2b6e8b60b8a7 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp @@ -54,7 +54,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(header), ',', "", &columnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); AssertValuesEqual(builtResult, estimatedResult); } @@ -317,7 +317,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { {"col2", TTypeBuilder().BeginOptional().Primitive(EPrimitiveType::Int64).EndOptional().Build()}, {"col3", TTypeBuilder().Primitive(EPrimitiveType::Bool).Build()}, }; - + TString csvHeader = "col4,col3,col5,col1,col6"; std::vector data = { "col4 unused value,true,col5 unused value,col1 value,col6 unused value" @@ -325,7 +325,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(csvHeader), ',', "", &tableColumnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); TValue expexctedResult = TValueBuilder().BeginList() .AddListItem().BeginStruct() diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 9ade22b16fa9..8caba55cb0da 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include #include @@ -50,6 +52,7 @@ #include #endif +#include namespace NYdb { namespace NConsoleClient { @@ -1076,6 +1079,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, throw; } }; + UpsertTValueBuffer(dbPath, std::move(buildFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); @@ -1221,7 +1225,7 @@ TStatus TImportFileClient::TImpl::UpsertCsvByBlocks(const TString& filePath, auto upsertCsvFunc = [&](std::vector&& buffer) { auto buildFunc = [&jobsInflight, &parser, buffer = std::move(buffer), &filePath, this]() mutable { try { - return parser.BuildList(buffer, filePath); + return parser.BuildList(buffer, filePath, std::nullopt); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); From 54b94eb89b57f9b8b7a367ac16b7f770777d83fa Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 11:20:39 +0000 Subject: [PATCH 02/35] feat: move protos into request in TTableClient::TImpl::BulkUpsert Because `TValue&& rows` is provided as rvalue reference, we can move its value into the request rather than making a copy of the data. Speed boost: ~130MiB/s -> ~168-200MiB/s. --- .../sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 6 +++++- ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp | 4 ++-- ydb/public/sdk/cpp/src/client/value/value.cpp | 8 ++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 61eb9ce2c0f2..1dec23870753 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -29,6 +29,8 @@ class TType { const Ydb::Type& GetProto() const; Ydb::Type& GetProto(); + Ydb::Type ExtractProto() &&; + private: class TImpl; std::shared_ptr Impl_; @@ -271,11 +273,13 @@ class TValue { TValue(const TType& type, Ydb::Value&& valueProto); const TType& GetType() const; - TType & GetType(); + TType& GetType(); const Ydb::Value& GetProto() const; Ydb::Value& GetProto(); + Ydb::Value ExtractProto() &&; + private: class TImpl; std::shared_ptr Impl_; diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 64fa171dd0a3..2dadf2bef0e1 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -992,8 +992,8 @@ void TTableClient::TImpl::SetStatCollector(const NSdkStats::TStatCollector::TCli TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); - *request.mutable_rows()->mutable_type() = TProtoAccessor::GetProto(rows.GetType()); - *request.mutable_rows()->mutable_value() = rows.GetProto(); + *request.mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); + *request.mutable_rows()->mutable_value() = std::move(rows).ExtractProto(); auto promise = NewPromise(); diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 2516f3a42264..8eefd20b245d 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -111,6 +111,10 @@ Ydb::Type& TType::GetProto() return Impl_->ProtoType_; } +Ydb::Type TType::ExtractProto() && { + return std::move(Impl_->ProtoType_); +} + //////////////////////////////////////////////////////////////////////////////// class TTypeParser::TImpl { @@ -1080,6 +1084,10 @@ Ydb::Value& TValue::GetProto() { return Impl_->ProtoValue_; } +Ydb::Value TValue::ExtractProto() && { + return std::move(Impl_->ProtoValue_); +} + //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From 5e748c429b1e58bd2588cfeebada5228b0edf553 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 14:00:46 +0000 Subject: [PATCH 03/35] feat: extract proto model out of FieldToValue in TCsvParser --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index da7585ab1026..91ecdf9aa804 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -622,7 +622,8 @@ void TCsvParser::ProcessCsvLine( } TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { - *structItems->Add() = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt).GetProto(); + TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); + *structItems->Add() = std::move(builtValue).ExtractProto(); ++typeParserIt; } ++headerIt; From d8df57985bcc9ff39adb990196d211b999ce3833 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 13:38:35 +0000 Subject: [PATCH 04/35] feat: add && in return types of ExtractProto methods --- ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 4 ++-- ydb/public/sdk/cpp/src/client/value/value.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 1dec23870753..bb74c8a5aa1a 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -29,7 +29,7 @@ class TType { const Ydb::Type& GetProto() const; Ydb::Type& GetProto(); - Ydb::Type ExtractProto() &&; + Ydb::Type&& ExtractProto() &&; private: class TImpl; @@ -278,7 +278,7 @@ class TValue { const Ydb::Value& GetProto() const; Ydb::Value& GetProto(); - Ydb::Value ExtractProto() &&; + Ydb::Value&& ExtractProto() &&; private: class TImpl; diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 8eefd20b245d..17f22a43d755 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -111,7 +111,7 @@ Ydb::Type& TType::GetProto() return Impl_->ProtoType_; } -Ydb::Type TType::ExtractProto() && { +Ydb::Type&& TType::ExtractProto() && { return std::move(Impl_->ProtoType_); } @@ -1084,7 +1084,7 @@ Ydb::Value& TValue::GetProto() { return Impl_->ProtoValue_; } -Ydb::Value TValue::ExtractProto() && { +Ydb::Value&& TValue::ExtractProto() && { return std::move(Impl_->ProtoValue_); } From 277c354d5dd6466dc34611bef62de336575632a8 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 13:56:16 +0000 Subject: [PATCH 05/35] feat: move rows' proto into request proto in a separate method BulkUpsertUnretryable (leaving BulkUpsert unchanged) --- ydb/public/lib/ydb_cli/import/import.cpp | 2 +- .../include/ydb-cpp-sdk/client/table/table.h | 7 +++++ .../src/client/table/impl/table_client.cpp | 30 ++++++++++++++++++- .../cpp/src/client/table/impl/table_client.h | 5 ++++ ydb/public/sdk/cpp/src/client/table/table.cpp | 6 ++++ 5 files changed, 48 insertions(+), 2 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 8caba55cb0da..f9ade68b9163 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -973,7 +973,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, // to prevent copying data in retryFunc in a happy way when there is only one request TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(); prebuiltValue = std::nullopt; - return tableClient.BulkUpsert(dbPath, std::move(builtValue), UpsertSettings) + return tableClient.BulkUpsertUnretryable(dbPath, std::move(builtValue), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); return NThreading::MakeFuture(status); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 01bccbc00c03..8daa2850403b 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1260,6 +1260,13 @@ class TTableClient { //! Using the client after call this method causes UB NThreading::TFuture Stop(); + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, + * therefore, it is not retryable with the same `rows` object. + */ + TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, + const TBulkUpsertSettings& settings); + //! Non-transactional fast bulk write. //! Interanlly it uses an implicit session and thus doesn't need a session to be passed. //! "rows" parameter must be a list of structs where each stuct represents one row. diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 2dadf2bef0e1..bcc955c299d2 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -989,7 +989,7 @@ void TTableClient::TImpl::SetStatCollector(const NSdkStats::TStatCollector::TCli SessionRemovedDueBalancing.Set(collector.SessionRemovedDueBalancing); } -TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); *request.mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); @@ -1015,6 +1015,34 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, return promise.GetFuture(); } + +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { + auto request = MakeOperationRequest(settings); + request.set_table(TStringType{table}); + *request.mutable_rows()->mutable_type() = TProtoAccessor::GetProto(rows.GetType()); + *request.mutable_rows()->mutable_value() = rows.GetProto(); + + auto promise = NewPromise(); + + auto extractor = [promise] + (google::protobuf::Any* any, TPlainStatus status) mutable { + Y_UNUSED(any); + TBulkUpsertResult val(TStatus(std::move(status))); + promise.SetValue(std::move(val)); + }; + + Connections_->RunDeferred( + std::move(request), + extractor, + &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings)); + + return promise.GetFuture(); +} + + TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index b269c24dedb1..49abec6152cb 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -138,6 +138,11 @@ class TTableClient::TImpl: public TClientImplCommon, public void SetStatCollector(const NSdkStats::TStatCollector::TClientStatCollector& collector); + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, + * therefore, it is not retryable with the same `rows` object. + */ + TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index 65b271ddc04d..52d732535f0f 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1457,6 +1457,12 @@ NThreading::TFuture TTableClient::Stop() { return Impl_->Stop(); } +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& table, TValue&& rows, + const TBulkUpsertSettings& settings) +{ + return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); +} + TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { From ffa3fed0e4b32cc5f5ca79ad8a742ee1100aabcd Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 15:54:34 +0000 Subject: [PATCH 06/35] feat: move line into buffer in UpsertCsv --- ydb/public/lib/ydb_cli/import/import.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index f9ade68b9163..0b5d1d2e3771 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1119,7 +1119,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, line.erase(line.size() - Settings.Delimiter_.size()); } - buffer.push_back(line); + buffer.push_back(std::move(line)); if (readBytes >= nextReadBorder && Settings.Verbose_) { nextReadBorder += VerboseModeStepSize; From ac9928e1a19be089d0312b7d289cc1c63342b323 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 09:16:28 +0000 Subject: [PATCH 07/35] feat: introduce per-request arena (allocate Ydb::Value on arena, 1-to-1 allocation) --- ydb/public/lib/ydb_cli/import/import.cpp | 33 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 0b5d1d2e3771..5b2918ac0a0d 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -304,6 +304,11 @@ class TCsvFileReader { } } + // Get the protobuf arena for allocating messages + // google::protobuf::Arena* GetArena() { + // return &ProtoArena; + // } + TFileChunk& GetChunk(size_t threadId) { if (threadId >= Chunks.size()) { throw yexception() << "File chunk number is too big"; @@ -323,6 +328,7 @@ class TCsvFileReader { TVector Chunks; size_t SplitCount; size_t MaxInFlight; + // google::protobuf::Arena ProtoArena; }; class TJobInFlightManager { @@ -607,6 +613,10 @@ TImportFileClient::TImportFileClient(const TDriver& driver, const TClientCommand } TStatus TImportFileClient::Import(const TVector& filePaths, const TString& dbPath) { + // { + // google::protobuf::Arena arena; + // MyMessage* message = google::protobuf::Arena::Create(&arena); + // } return Impl_->Import(filePaths, dbPath); } @@ -1067,10 +1077,18 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { + auto upsertCsvFunc = [&](std::vector&& buffer, ui64 batchBytes, ui64 row, std::shared_ptr batchStatus) { + // Create an arena per batch that will be shared in the closure + google::protobuf::ArenaOptions options{}; + options.start_block_size = batchBytes; + std::shared_ptr arena = std::make_shared(std::move(options)); + + auto buildFunc = [&, buffer = std::move(buffer), row, arena, this] () mutable { try { - return parser.BuildList(buffer, filePath, row); + // Use the arena for allocating protobuf objects + auto a = parser.BuildList(buffer, filePath, row, arena.get()); + // std::cerr << "row " << row << ": SpaceUsed=" << PrettifyBytes(arena->SpaceUsed()) << ", SpaceAllocated=" << PrettifyBytes(arena->SpaceAllocated()) << std::endl; + return a; } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); @@ -1081,7 +1099,8 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; UpsertTValueBuffer(dbPath, std::move(buildFunc)) - .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + // copy arena to the closure to ensure it's not destroyed before the request completes + .Apply([&, batchStatus, arena](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); if (asyncStatus.GetValueSync().IsSuccess()) { batchStatus->Completed = true; @@ -1134,9 +1153,9 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, progressCallback(skippedBytes + readBytes, *inputSizeHint); } - auto workerFunc = [&upsertCsvFunc, row, buffer = std::move(buffer), + auto workerFunc = [&upsertCsvFunc, batchBytes, row, buffer = std::move(buffer), batchStatus = createStatus(row + batchRows)]() mutable { - upsertCsvFunc(std::move(buffer), row, batchStatus); + upsertCsvFunc(std::move(buffer), batchBytes, row, batchStatus); }; row += batchRows; batchRows = 0; @@ -1157,7 +1176,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, // Send the rest if buffer is not empty if (!buffer.empty() && countInput.Counter() > 0 && !Failed) { jobInflightManager->AcquireJob(); - upsertCsvFunc(std::move(buffer), row, createStatus(row + batchRows)); + upsertCsvFunc(std::move(buffer), batchBytes, row, createStatus(row + batchRows)); } jobInflightManager->WaitForAllJobs(); From df8181068452b17ab08f886f2f638662ef6762d5 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 16:06:21 +0000 Subject: [PATCH 08/35] Revert "feat: introduce per-request arena (allocate Ydb::Value on arena, 1-to-1 allocation)" This reverts commit b9bd7cd615768e4854521ff04211a02f67758900. --- ydb/public/lib/ydb_cli/import/import.cpp | 33 +++++------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 5b2918ac0a0d..0b5d1d2e3771 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -304,11 +304,6 @@ class TCsvFileReader { } } - // Get the protobuf arena for allocating messages - // google::protobuf::Arena* GetArena() { - // return &ProtoArena; - // } - TFileChunk& GetChunk(size_t threadId) { if (threadId >= Chunks.size()) { throw yexception() << "File chunk number is too big"; @@ -328,7 +323,6 @@ class TCsvFileReader { TVector Chunks; size_t SplitCount; size_t MaxInFlight; - // google::protobuf::Arena ProtoArena; }; class TJobInFlightManager { @@ -613,10 +607,6 @@ TImportFileClient::TImportFileClient(const TDriver& driver, const TClientCommand } TStatus TImportFileClient::Import(const TVector& filePaths, const TString& dbPath) { - // { - // google::protobuf::Arena arena; - // MyMessage* message = google::protobuf::Arena::Create(&arena); - // } return Impl_->Import(filePaths, dbPath); } @@ -1077,18 +1067,10 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - auto upsertCsvFunc = [&](std::vector&& buffer, ui64 batchBytes, ui64 row, std::shared_ptr batchStatus) { - // Create an arena per batch that will be shared in the closure - google::protobuf::ArenaOptions options{}; - options.start_block_size = batchBytes; - std::shared_ptr arena = std::make_shared(std::move(options)); - - auto buildFunc = [&, buffer = std::move(buffer), row, arena, this] () mutable { + auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { + auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { try { - // Use the arena for allocating protobuf objects - auto a = parser.BuildList(buffer, filePath, row, arena.get()); - // std::cerr << "row " << row << ": SpaceUsed=" << PrettifyBytes(arena->SpaceUsed()) << ", SpaceAllocated=" << PrettifyBytes(arena->SpaceAllocated()) << std::endl; - return a; + return parser.BuildList(buffer, filePath, row); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); @@ -1099,8 +1081,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; UpsertTValueBuffer(dbPath, std::move(buildFunc)) - // copy arena to the closure to ensure it's not destroyed before the request completes - .Apply([&, batchStatus, arena](const TAsyncStatus& asyncStatus) { + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); if (asyncStatus.GetValueSync().IsSuccess()) { batchStatus->Completed = true; @@ -1153,9 +1134,9 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, progressCallback(skippedBytes + readBytes, *inputSizeHint); } - auto workerFunc = [&upsertCsvFunc, batchBytes, row, buffer = std::move(buffer), + auto workerFunc = [&upsertCsvFunc, row, buffer = std::move(buffer), batchStatus = createStatus(row + batchRows)]() mutable { - upsertCsvFunc(std::move(buffer), batchBytes, row, batchStatus); + upsertCsvFunc(std::move(buffer), row, batchStatus); }; row += batchRows; batchRows = 0; @@ -1176,7 +1157,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, // Send the rest if buffer is not empty if (!buffer.empty() && countInput.Counter() > 0 && !Failed) { jobInflightManager->AcquireJob(); - upsertCsvFunc(std::move(buffer), batchBytes, row, createStatus(row + batchRows)); + upsertCsvFunc(std::move(buffer), row, createStatus(row + batchRows)); } jobInflightManager->WaitForAllJobs(); From 1612ff6456dbea9062ba150279a5fd1b6a0eb630 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 19:52:53 +0000 Subject: [PATCH 09/35] feat: implement single-arena-per-request strategy Implement additional APIs that work with arena-allocated proto messages. The implementation creates a new protobuf::Arena per batch request and builds Ydb::Value inside this arena inside TCsvParser::BuildListOnArena. Ydb::Value gets created once and then moved into Ydb::Table::BulkUpsertRequest, which is allocated on the same arena (allocation in the same arena prevents copying). Left undone/requires modifications: 1. TODO: I had to copy-paste RunDeferred/Run methods (see: RunDeferredOnArena/RunOnArena) because arena-allocated messages are returned as pointers, but the API expected rvalue/lvalue references on the request message. If the pointer is deferefenced, there will be a single copy of the message inside TGRpcConnectionsImpl::Run when WithServiceConnection is called and request is moved into capture parameter of a callback. In the best case, there must be no code duplication (the copy-pasted methods differ only in the type of accepted request: they accept TRequest*). 2. TODO: ensure correctness. Speed boost: ~130MiB/s -> ~250MiB/s --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 70 +++-- ydb/public/lib/ydb_cli/common/csv_parser.h | 12 +- ydb/public/lib/ydb_cli/import/import.cpp | 85 +++++- .../include/ydb-cpp-sdk/client/table/table.h | 7 + .../grpc_connections/grpc_connections.h | 246 +++++++++++++++++- .../impl/ydb_internal/make_request/make.h | 14 + .../src/client/table/impl/table_client.cpp | 43 +++ .../cpp/src/client/table/impl/table_client.h | 8 + ydb/public/sdk/cpp/src/client/table/table.cpp | 8 + 9 files changed, 458 insertions(+), 35 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 91ecdf9aa804..4ebbd42de08b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -558,46 +558,64 @@ void TCsvParser::BuildValue(TString& data, TValueBuilder& builder, const TType& builder.EndStruct(); } -TValue TCsvParser::BuildList(const std::vector& lines, const TString& filename, std::optional row, google::protobuf::Arena* arena) const { +TValue TCsvParser::BuildList(const std::vector& lines, const TString& filename, std::optional row) const { std::vector> columnTypeParsers; columnTypeParsers.reserve(ResultColumnCount); for (const TType* type : ResultLineTypesSorted) { columnTypeParsers.push_back(std::make_unique(*type)); } - if (arena) { - // Arena allocation path - Ydb::Value* valuePtr = google::protobuf::Arena::CreateMessage(arena); - auto* listItems = valuePtr->mutable_items(); - listItems->Reserve(lines.size()); + // Original path with local value object + Ydb::Value valueProto; + auto* listItems = valueProto.mutable_items(); + listItems->Reserve(lines.size()); - for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); - if (row.has_value()) { - ++row.value(); - } + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); } + } - // Return a TValue that references the arena-allocated message - return TValue(ResultListType.value(), std::move(*valuePtr)); - } else { - // Original path with local value object - Ydb::Value valueProto; - auto* listItems = valueProto.mutable_items(); - listItems->Reserve(lines.size()); + // Return a TValue that takes ownership via move + return TValue(ResultListType.value(), std::move(valueProto)); +} - for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); - if (row.has_value()) { - ++row.value(); - } - } - // Return a TValue that takes ownership via move - return TValue(ResultListType.value(), std::move(valueProto)); + +// TODO: std::pair -> ArenaAllocatedTValue +std::pair TCsvParser::BuildListOnArena( + const std::vector& lines, + const TString& filename, + google::protobuf::Arena* arena, + std::optional row +) const { + assert(arena != nullptr); + + std::vector> columnTypeParsers; + columnTypeParsers.reserve(ResultColumnCount); + for (const TType* type : ResultLineTypesSorted) { + columnTypeParsers.push_back(std::make_unique(*type)); + } + + // allocate Ydb::Value on arena + Ydb::Value* value = google::protobuf::Arena::CreateMessage(arena); + auto* listItems = value->mutable_items(); + listItems->Reserve(lines.size()); + + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); + } } + + // Return a TValue that references the arena-allocated message + return std::make_pair(ResultListType.value(), value); } + + // Helper method to process a single CSV line void TCsvParser::ProcessCsvLine( const TString& line, diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 7c89c7f7be39..69194ba366f3 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -68,9 +68,17 @@ class TCsvParser { void BuildParams(TString& data, TParamsBuilder& builder, const TParseMetadata& meta) const; void BuildValue(TString& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const; + TValue BuildList(const std::vector& lines, const TString& filename, - std::optional row = std::nullopt, - google::protobuf::Arena* arena = nullptr) const; + std::optional row = std::nullopt) const; + + std::pair BuildListOnArena( + const std::vector& lines, + const TString& filename, + google::protobuf::Arena* arena, + std::optional row = std::nullopt + ) const; + void BuildLineType(); const TVector& GetHeader(); void ParseLineTypes(TString& line, TPossibleTypes& possibleTypes, const TParseMetadata& meta); diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 0b5d1d2e3771..9a432c4356ca 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -560,6 +560,10 @@ class TImportFileClient::TImpl { std::shared_ptr progressFile); TAsyncStatus UpsertTValueBuffer(const TString& dbPath, TValueBuilder& builder); TAsyncStatus UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc); + + TAsyncStatus UpsertTValueBufferOnArena( + const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc); + TStatus UpsertJson(IInputStream &input, const TString &dbPath, std::optional inputSizeHint, ProgressCallbackFunc & progressCallback); TStatus UpsertParquet(const TString& filename, const TString& dbPath, ProgressCallbackFunc & progressCallback); @@ -966,6 +970,7 @@ inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc) { // For the first attempt values are built before acquiring request inflight semaphore std::optional prebuiltValue = buildFunc(); + auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue)] (NYdb::NTable::TTableClient& tableClient) mutable -> TAsyncStatus { auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue]() { @@ -1006,6 +1011,67 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, }); } + + + + +// TODO: std::pair -> ArenaAllocatedTValue +inline +TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( + const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc) { + using YdbValuePair = std::pair; + auto arena = std::make_shared(); + + // For the first attempt values are built before acquiring request inflight semaphore + std::optional prebuiltValue = buildFunc(arena.get()); + + auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), + prebuiltValue = std::move(prebuiltValue), arena = std::move(arena)] + (NYdb::NTable::TTableClient& tableClient) mutable -> TAsyncStatus { + auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue, arena]() { + // For every retry attempt after first request build value from strings again + // to prevent copying data in retryFunc in a happy way when there is only one request + YdbValuePair builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); + prebuiltValue = std::nullopt; + return tableClient.BulkUpsertUnretryableArenaAllocated( + dbPath, std::move(builtValue), arena.get(), UpsertSettings) + .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { + NYdb::TStatus status = bulkUpsertResult.GetValueSync(); + return NThreading::MakeFuture(status); + }); + }; + // Running heavy building task on processing pool: + return NThreading::Async(std::move(buildTValueAndSendRequest), *ProcessingPool); + }; + if (!RequestsInflight->try_acquire()) { + if (Settings.Verbose_ && Settings.NewlineDelimited_) { + if (!InformedAboutLimit.exchange(true)) { + Cerr << (TStringBuilder() << "@ (each '@' means max request inflight is reached and a worker thread is waiting for " + "any response from database)" << Endl); + } else { + Cerr << '@'; + } + } + RequestsInflight->acquire(); + } + return TableClient->RetryOperation(std::move(retryFunc), RetrySettings) + .Apply([this](const TAsyncStatus& asyncStatus) { + NYdb::TStatus status = asyncStatus.GetValueSync(); + if (!status.IsSuccess()) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(status); + } + } + RequestsInflight->release(); + return asyncStatus; + }); +} + + + + + + TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, const TString& dbPath, const TString& filePath, @@ -1068,9 +1134,21 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { + // auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { + // try { + // return parser.BuildList(buffer, filePath, row); + // } catch (const std::exception& e) { + // if (!Failed.exchange(true)) { + // ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); + // } + // jobInflightManager->ReleaseJob(); + // throw; + // } + // }; + + auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { try { - return parser.BuildList(buffer, filePath, row); + return parser.BuildListOnArena(buffer, filePath, arena, row); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); @@ -1080,7 +1158,8 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - UpsertTValueBuffer(dbPath, std::move(buildFunc)) + // UpsertTValueBuffer(dbPath, std::move(buildFunc)) + UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); if (asyncStatus.GetValueSync().IsSuccess()) { diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 8daa2850403b..65e84b17aed1 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1267,6 +1267,13 @@ class TTableClient { TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings + ); + //! Non-transactional fast bulk write. //! Interanlly it uses an implicit session and thus doesn't need a session to be passed. //! "rows" parameter must be a list of structs where each stuct represents one row. diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index f1a23631c94e..4a457de8642a 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -174,7 +174,7 @@ class TGRpcConnectionsImpl } WithServiceConnection( - [this, request = std::move(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { if (!status.Ok()) { userResponseCb( @@ -193,7 +193,7 @@ class TGRpcConnectionsImpl meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); } catch (const std::exception& e) { userResponseCb( - nullptr, + nullptr, TPlainStatus( EStatus::CLIENT_UNAUTHENTICATED, TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() @@ -276,6 +276,150 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } + + + + + template + void RunOnArena( + TRequest* request, + TResponseCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + const TRpcRequestSettings& requestSettings, + std::shared_ptr context = nullptr) + { + using NYdbGrpc::TGrpcStatus; + using TConnection = std::unique_ptr>; + Y_ABORT_UNLESS(dbState); + + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + return; + } + + if (dbState->StatCollector.IsCollecting()) { + std::weak_ptr weakState = dbState; + const auto startTime = TInstant::Now(); + userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + const auto resultSize = response ? response->ByteSizeLong() : 0; + cb(response, status); + + if (auto state = weakState.lock()) { + state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + state->StatCollector.IncResultSize(resultSize); + } + }); + } + + WithServiceConnection( + [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + TCallMeta meta; + meta.Timeout = requestSettings.ClientTimeout; + #ifndef YDB_GRPC_UNSECURE_AUTH + meta.CallCredentials = dbState->CallCredentials; + #else + if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + try { + meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + } catch (const std::exception& e) { + userResponseCb( + nullptr, + TPlainStatus( + EStatus::CLIENT_UNAUTHENTICATED, + TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + ) + ); + return; + } + } + #endif + if (!requestSettings.TraceId.empty()) { + meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + } + + if (!requestSettings.RequestType.empty()) { + meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + } + + if (!dbState->Database.empty()) { + SetDatabaseHeader(meta, dbState->Database); + } + + static const std::string clientPid = GetClientPIDHeaderValue(); + + meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + dbState->StatCollector.IncGRpcInFlight(); + dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + NYdbGrpc::TAdvancedResponseCallback responseCbLow = + [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + dbState->StatCollector.DecGRpcInFlight(); + dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + std::multimap metadata; + + for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + + auto resp = new TResult( + std::move(response), + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint(), + std::move(metadata)); + + EnqueueResponse(resp); + } else { + dbState->StatCollector.IncReqFailDueTransportError(); + dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + auto resp = new TGRpcErrorResponse( + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint()); + + dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + EnqueueResponse(resp); + } + }; + + serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, + context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); + } + + + + + template void RunDeferred( TRequest&& request, @@ -321,7 +465,62 @@ class TGRpcConnectionsImpl }; Run( - std::move(request), + std::forward(request), + responseCb, + rpc, + dbState, + requestSettings, + std::move(context)); + } + + + + template + void RunDeferredOnArena( + TRequest* request, + TDeferredOperationCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + TDuration deferredTimeout, + const TRpcRequestSettings& requestSettings, + bool poll = false, + std::shared_ptr context = nullptr) + { + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, status); + return; + } + + auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] + (TResponse* response, TPlainStatus status) mutable + { + if (response) { + Ydb::Operations::Operation* operation = response->mutable_operation(); + if (!operation->ready() && poll) { + auto action = MakeIntrusive( + operation->id(), + std::move(userResponseCb), + this, + std::move(context), + deferredTimeout, + dbState, + status.Endpoint); + + action->Start(); + } else { + NYdb::NIssue::TIssues opIssues; + NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); + userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), + status.Endpoint, std::move(status.Metadata)}); + } + } else { + userResponseCb(nullptr, status); + } + }; + + RunOnArena( + request, responseCb, rpc, dbState, @@ -329,6 +528,9 @@ class TGRpcConnectionsImpl std::move(context)); } + + + // Run request using discovery endpoint. // Mostly usefull to make calls from credential provider template @@ -375,7 +577,7 @@ class TGRpcConnectionsImpl }; RunDeferred( - std::move(request), + std::forward(request), operationCb, rpc, dbState, @@ -385,6 +587,42 @@ class TGRpcConnectionsImpl context); } + + + +template + void RunDeferredOnArena( + TRequest* request, + TDeferredResultCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + TDuration deferredTimeout, + const TRpcRequestSettings& requestSettings, + std::shared_ptr context = nullptr) + { + auto operationCb = [userResponseCb = std::move(userResponseCb)](Ydb::Operations::Operation* operation, TPlainStatus status) mutable { + if (operation) { + status.SetCostInfo(std::move(*operation->mutable_cost_info())); + userResponseCb(operation->mutable_result(), std::move(status)); + } else { + userResponseCb(nullptr, std::move(status)); + } + }; + + RunDeferredOnArena( + request, + operationCb, + rpc, + dbState, + deferredTimeout, + requestSettings, + true, // poll + context); + } + + + + template class TStream> using TStreamRpc = typename TStream< diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h index d8dd35dbe6b7..392fd35cf793 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h @@ -46,4 +46,18 @@ TProtoRequest MakeOperationRequest(const TRequestSettings& settings) { return request; } + +template +TProtoRequest* MakeRequestOnArena(google::protobuf::Arena* arena) { + assert(arena != nullptr); + return google::protobuf::Arena::CreateMessage(arena); +} + +template +TProtoRequest* MakeOperationRequestOnArena(const TRequestSettings& settings, google::protobuf::Arena* arena) { + auto request = MakeRequestOnArena(arena); + FillOperationParams(settings, *request); + return request; +} + } // namespace NYdb diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index bcc955c299d2..3e3168f368a9 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1016,6 +1016,49 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryable(const std::str } +// TODO: std::pair -> ArenaAllocatedTValue +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( + const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings +) { + auto request = MakeOperationRequestOnArena(settings, arena); + // std::cout << "request->GetArena(): " << request->GetArena() << "\n" + // << "request->mutable_rows()->GetArena(): " << request->mutable_rows()->GetArena() << "\n" + // << "Value->GetArena(): " << rows.second->GetArena() << "\n" + // << "given arena: " << arena << std::endl; + // assert(request->GetArena() == arena); + // assert(request->mutable_rows()->GetArena() == arena); + request->set_table(TStringType{table}); + *request->mutable_rows()->mutable_type() = std::move(rows.first).ExtractProto(); + *request->mutable_rows()->mutable_value() = std::move(*rows.second); + + auto promise = NewPromise(); + + auto extractor = [promise] + (google::protobuf::Any* any, TPlainStatus status) mutable { + Y_UNUSED(any); + TBulkUpsertResult val(TStatus(std::move(status))); + promise.SetValue(std::move(val)); + }; + + // TODO: make sure it is correct; see what server receives + // don't give ownership of request to the function + Connections_->RunDeferredOnArena( + request, + extractor, + &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings)); + + return promise.GetFuture(); +} + + + + TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 49abec6152cb..436d7c68d480 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -143,6 +143,14 @@ class TTableClient::TImpl: public TClientImplCommon, public * therefore, it is not retryable with the same `rows` object. */ TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings + ); + TAsyncBulkUpsertResult BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index 52d732535f0f..69373930f9f0 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1463,6 +1463,14 @@ TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& ta return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); } +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated(const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings) +{ + return Impl_->BulkUpsertUnretryableArenaAllocated(table, std::move(rows), arena, settings); +} + TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { From 127dd6546b45dbaad108ecbcbfa9568e38d98db5 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 20:19:23 +0000 Subject: [PATCH 10/35] fix: remmove nullptr param from BuildList call in tests --- ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp index 2b6e8b60b8a7..b84553a6b531 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp @@ -54,7 +54,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(header), ',', "", &columnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0); AssertValuesEqual(builtResult, estimatedResult); } @@ -325,7 +325,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(csvHeader), ',', "", &tableColumnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0); TValue expexctedResult = TValueBuilder().BeginList() .AddListItem().BeginStruct() From 07137adabf821e69700b91cd1b623ba1247ea3e3 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 20:21:44 +0000 Subject: [PATCH 11/35] refactor: remove explicit default parameter from BuildList call --- ydb/public/lib/ydb_cli/import/import.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 9a432c4356ca..d6de63e852cf 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1304,7 +1304,7 @@ TStatus TImportFileClient::TImpl::UpsertCsvByBlocks(const TString& filePath, auto upsertCsvFunc = [&](std::vector&& buffer) { auto buildFunc = [&jobsInflight, &parser, buffer = std::move(buffer), &filePath, this]() mutable { try { - return parser.BuildList(buffer, filePath, std::nullopt); + return parser.BuildList(buffer, filePath); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); From 65dc7e1b8bde6adefc34005b60a7d6245276aec6 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 20:44:45 +0000 Subject: [PATCH 12/35] feat: add several TODOs --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 2 ++ ydb/public/lib/ydb_cli/import/import.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 4ebbd42de08b..d2467137cd65 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -640,6 +640,8 @@ void TCsvParser::ProcessCsvLine( } TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { + // TODO: create Ydb::Value on the arena to avoid copying here + // TODO: (despite std::move, we copy from non-arena allocated Ydb::Value into arena-allocated structItems) TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); *structItems->Add() = std::move(builtValue).ExtractProto(); ++typeParserIt; diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index d6de63e852cf..09fd2873fda5 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1158,6 +1158,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; + // TODO: create arena here and pass it to UpsertTValueBufferOnArena? // UpsertTValueBuffer(dbPath, std::move(buildFunc)) UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { From c16423fb4351fec93651640d71b91f4caa7681d0 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 2 May 2025 14:30:12 +0000 Subject: [PATCH 13/35] feat: name file-processing threads in their thread pool --- ydb/public/lib/ydb_cli/import/import.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 09fd2873fda5..7b8c8f8ebe11 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -689,7 +689,7 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons auto start = TInstant::Now(); - TThreadPool jobPool; + TThreadPool jobPool(IThreadPool::TParams().SetThreadNamePrefix("FileWorker")); jobPool.Start(filePathsSize); TVector> asyncResults; @@ -709,6 +709,7 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons for (size_t fileOrderNumber = 0; fileOrderNumber < filePathsSize; ++fileOrderNumber) { const auto& filePath = filePaths[fileOrderNumber]; std::shared_ptr progressFile = LoadOrStartImportProgress(filePath); + auto func = [&, fileOrderNumber, progressFile, this] { std::unique_ptr fileInput; std::optional fileSizeHint; From cf459a686bbe42fe0e6ac206266986908e2df151 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 2 May 2025 14:30:38 +0000 Subject: [PATCH 14/35] feat: add TODO about arena-based TValueBuilderImpl --- ydb/public/sdk/cpp/src/client/value/value.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 17f22a43d755..2105ccc8a957 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2024,6 +2024,7 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// +// TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; using TMembersMap = std::map; From 9b44f1fe03f99e4e310841092de594863924baf1 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 2 May 2025 15:34:29 +0000 Subject: [PATCH 15/35] refactor: replace pair of TType and Ydb::Value* with type TArenaAllocatedValue --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 7 ++-- ydb/public/lib/ydb_cli/common/csv_parser.h | 2 +- ydb/public/lib/ydb_cli/import/import.cpp | 10 ++--- .../include/ydb-cpp-sdk/client/table/table.h | 2 +- .../include/ydb-cpp-sdk/client/value/value.h | 18 ++++++++ .../src/client/table/impl/table_client.cpp | 8 ++-- .../cpp/src/client/table/impl/table_client.h | 2 +- ydb/public/sdk/cpp/src/client/table/table.cpp | 2 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 42 +++++++++++++++++++ 9 files changed, 75 insertions(+), 18 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index d2467137cd65..604ab8c0a408 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -583,14 +583,13 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f -// TODO: std::pair -> ArenaAllocatedTValue -std::pair TCsvParser::BuildListOnArena( +TArenaAllocatedValue TCsvParser::BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, std::optional row ) const { - assert(arena != nullptr); + Y_ASSERT(arena != nullptr); std::vector> columnTypeParsers; columnTypeParsers.reserve(ResultColumnCount); @@ -611,7 +610,7 @@ std::pair TCsvParser::BuildListOnArena( } // Return a TValue that references the arena-allocated message - return std::make_pair(ResultListType.value(), value); + return TArenaAllocatedValue(ResultListType.value(), value); } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 69194ba366f3..1a1330de904b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -72,7 +72,7 @@ class TCsvParser { TValue BuildList(const std::vector& lines, const TString& filename, std::optional row = std::nullopt) const; - std::pair BuildListOnArena( + TArenaAllocatedValue BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 7b8c8f8ebe11..518114f94461 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -562,7 +562,7 @@ class TImportFileClient::TImpl { TAsyncStatus UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc); TAsyncStatus UpsertTValueBufferOnArena( - const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc); + const TString& dbPath, std::function&& buildFunc); TStatus UpsertJson(IInputStream &input, const TString &dbPath, std::optional inputSizeHint, ProgressCallbackFunc & progressCallback); @@ -1016,15 +1016,13 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, -// TODO: std::pair -> ArenaAllocatedTValue inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( - const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc) { - using YdbValuePair = std::pair; + const TString& dbPath, std::function&& buildFunc) { auto arena = std::make_shared(); // For the first attempt values are built before acquiring request inflight semaphore - std::optional prebuiltValue = buildFunc(arena.get()); + std::optional prebuiltValue = buildFunc(arena.get()); auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue), arena = std::move(arena)] @@ -1032,7 +1030,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue, arena]() { // For every retry attempt after first request build value from strings again // to prevent copying data in retryFunc in a happy way when there is only one request - YdbValuePair builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); + TArenaAllocatedValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; return tableClient.BulkUpsertUnretryableArenaAllocated( dbPath, std::move(builtValue), arena.get(), UpsertSettings) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 65e84b17aed1..f2fdd5a64758 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1269,7 +1269,7 @@ class TTableClient { TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index bb74c8a5aa1a..3f1570aa8bd3 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -285,6 +285,24 @@ class TValue { std::shared_ptr Impl_; }; +class TArenaAllocatedValue { +public: + TArenaAllocatedValue(const TType& type, Ydb::Value* value); + + const TType& GetType() const; + TType& GetType(); + + const Ydb::Value* GetProto() const; + Ydb::Value* GetProto(); + + Ydb::Value&& ExtractProto() &&; + +private: + class TImpl; + std::shared_ptr Impl_; +}; + + class TValueParser : public TMoveOnly { friend class TResultSetParser; public: diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 3e3168f368a9..ffeca4647695 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1016,10 +1016,9 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryable(const std::str } -// TODO: std::pair -> ArenaAllocatedTValue TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ) { @@ -1031,8 +1030,9 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( // assert(request->GetArena() == arena); // assert(request->mutable_rows()->GetArena() == arena); request->set_table(TStringType{table}); - *request->mutable_rows()->mutable_type() = std::move(rows.first).ExtractProto(); - *request->mutable_rows()->mutable_value() = std::move(*rows.second); + // TODO: Ydb::Type still gets copied because request is arena-allocated and rows' Type is not + *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); + *request->mutable_rows()->mutable_value() = std::move(rows).ExtractProto(); auto promise = NewPromise(); diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 436d7c68d480..5acbd4e3730f 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -146,7 +146,7 @@ class TTableClient::TImpl: public TClientImplCommon, public TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index 69373930f9f0..a3aa90c16616 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1464,7 +1464,7 @@ TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& ta } TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated(const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 2105ccc8a957..adcb001622d9 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -1060,6 +1060,24 @@ class TValue::TImpl { Ydb::Value ProtoValue_; }; +/** +* Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. +*/ +class TArenaAllocatedValue::TImpl { +public: + TImpl(const TType& type, Ydb::Value* value) + : Type_(type) + , ArenaAllocatedProtoValue_(value) { + // assert that the value is indeed arena-allocated + Y_ASSERT(ArenaAllocatedProtoValue_ != nullptr); + Y_ASSERT(ArenaAllocatedProtoValue_->GetArena() != nullptr); + } + + // TODO: make Ydb::Type also arena-allocated? + TType Type_; + Ydb::Value* ArenaAllocatedProtoValue_; +}; + //////////////////////////////////////////////////////////////////////////////// TValue::TValue(const TType& type, const Ydb::Value& valueProto) @@ -1088,6 +1106,30 @@ Ydb::Value&& TValue::ExtractProto() && { return std::move(Impl_->ProtoValue_); } + +TArenaAllocatedValue::TArenaAllocatedValue(const TType& type, Ydb::Value* value) + : Impl_(new TImpl(type, value)) {} + +const TType& TArenaAllocatedValue::GetType() const { + return Impl_->Type_; +} + +TType& TArenaAllocatedValue::GetType() { + return Impl_->Type_; +} + +const Ydb::Value* TArenaAllocatedValue::GetProto() const { + return Impl_->ArenaAllocatedProtoValue_; +} + +Ydb::Value* TArenaAllocatedValue::GetProto() { + return Impl_->ArenaAllocatedProtoValue_; +} + +Ydb::Value&& TArenaAllocatedValue::ExtractProto() && { + return std::move(*Impl_->ArenaAllocatedProtoValue_); +} + //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From 666932d9d47fec091ef03f9e7e06dac993f9dc5e Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 8 May 2025 17:04:48 +0000 Subject: [PATCH 16/35] feat: build Ydb::Value on the arena in TCsvParser::BuildListOnArena Notes: 1. Creates custom converter in `csv_parser.cpp` to build `Ydb::Value` on the arena. 2. Creates new implementer of `TValueBuilderBase` to build `TArenaAllocatedValue` in `value.cpp`/`value.h`. 3. Introduces `TValueHolder` interface with two implementations: - `StackAllocatedValueHolder` with stack-allocated `Ydb::Value`. - `TArenaAllocatedValueHolder` with arena-allocated `Ydb::Value`. Current drawbacks: - `TValueHolder` has virtual calls, which is costy; try to employ static polymorphism. - `FieldToValueOnArena` creates `TCsvToYdbOnArenaConverter`, which is a copy of `TCsvToYdbConverter`. Verdict: didn't produce any performance improvements (likely due to virtual calls in `TValueHolder`). --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 464 +++++++++++++++++- ydb/public/lib/ydb_cli/common/csv_parser.h | 4 +- .../include/ydb-cpp-sdk/client/value/fwd.h | 1 + .../include/ydb-cpp-sdk/client/value/value.h | 10 + ydb/public/sdk/cpp/src/client/value/value.cpp | 97 +++- 5 files changed, 558 insertions(+), 18 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 604ab8c0a408..cfcd3db1110a 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -15,9 +15,7 @@ class TCsvToYdbConverter { public: explicit TCsvToYdbConverter(TTypeParser& parser, const std::optional& nullValue) : Parser(parser) - , NullValue(nullValue) - { - } + , NullValue(nullValue) {} template && std::is_signed_v, std::nullptr_t> = nullptr> static i64 StringToArithmetic(const TString& token, size_t& cnt) { @@ -416,6 +414,415 @@ class TCsvToYdbConverter { TValueBuilder Builder; }; + + +// TODO: merge with the impl above +class TCsvToYdbOnArenaConverter { +public: + explicit TCsvToYdbOnArenaConverter(TTypeParser& parser, const std::optional& nullValue, google::protobuf::Arena* arena) + : Parser(parser) + , NullValue(nullValue) + , Builder(arena) {} + + template && std::is_signed_v, std::nullptr_t> = nullptr> + static i64 StringToArithmetic(const TString& token, size_t& cnt) { + return std::stoll(token, &cnt); + } + + template && std::is_unsigned_v, std::nullptr_t> = nullptr> + static ui64 StringToArithmetic(const TString& token, size_t& cnt) { + return std::stoull(token, &cnt); + } + + template , std::nullptr_t> = nullptr> + static float StringToArithmetic(const TString& token, size_t& cnt) { + return std::stof(token, &cnt); + } + + template , std::nullptr_t> = nullptr> + static double StringToArithmetic(const TString& token, size_t& cnt) { + return std::stod(token, &cnt); + } + + template + T GetArithmetic(const TString& token) const { + size_t cnt; + try { + auto value = StringToArithmetic(token, cnt); + if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { + throw yexception(); + } + return static_cast(value); + } catch (std::exception& e) { + throw TCsvParseException() << "Expected " << Parser.GetPrimitive() << " value, received: \"" << token << "\"."; + } + } + + void BuildPrimitive(const TString& token) { + switch (Parser.GetPrimitive()) { + case EPrimitiveType::Int8: + Builder.Int8(GetArithmetic(token)); + break; + case EPrimitiveType::Int16: + Builder.Int16(GetArithmetic(token)); + break; + case EPrimitiveType::Int32: + Builder.Int32(GetArithmetic(token)); + break; + case EPrimitiveType::Int64: + Builder.Int64(GetArithmetic(token)); + break; + case EPrimitiveType::Uint8: + Builder.Uint8(GetArithmetic(token)); + break; + case EPrimitiveType::Uint16: + Builder.Uint16(GetArithmetic(token)); + break; + case EPrimitiveType::Uint32: + Builder.Uint32(GetArithmetic(token)); + break; + case EPrimitiveType::Uint64: + Builder.Uint64(GetArithmetic(token)); + break; + case EPrimitiveType::Bool: + Builder.Bool(GetBool(token)); + break; + case EPrimitiveType::String: + Builder.String(token); + break; + case EPrimitiveType::Utf8: + Builder.Utf8(token); + break; + case EPrimitiveType::Json: + Builder.Json(token); + break; + case EPrimitiveType::JsonDocument: + Builder.JsonDocument(token); + break; + case EPrimitiveType::Yson: + Builder.Yson(token); + break; + case EPrimitiveType::Uuid: + Builder.Uuid(TUuidValue{token}); + break; + case EPrimitiveType::Float: + Builder.Float(GetArithmetic(token)); + break; + case EPrimitiveType::Double: + Builder.Double(GetArithmetic(token)); + break; + case EPrimitiveType::DyNumber: + Builder.DyNumber(token); + break; + case EPrimitiveType::Date: { + TInstant date; + if (!TInstant::TryParseIso8601(token, date)) { + date = TInstant::Days(GetArithmetic(token)); + } + Builder.Date(date); + break; + } + case EPrimitiveType::Datetime: { + TInstant datetime; + if (!TInstant::TryParseIso8601(token, datetime)) { + datetime = TInstant::Seconds(GetArithmetic(token)); + } + Builder.Datetime(datetime); + break; + } + case EPrimitiveType::Timestamp: { + TInstant timestamp; + if (!TInstant::TryParseIso8601(token, timestamp)) { + timestamp = TInstant::MicroSeconds(GetArithmetic(token)); + } + Builder.Timestamp(timestamp); + break; + } + case EPrimitiveType::Interval: + Builder.Interval(GetArithmetic(token)); + break; + case EPrimitiveType::Date32: { + TInstant date; + if (TInstant::TryParseIso8601(token, date)) { + Builder.Date32(date.Days()); + } else { + Builder.Date32(GetArithmetic(token)); + } + break; + } + case EPrimitiveType::Datetime64: { + TInstant date; + if (TInstant::TryParseIso8601(token, date)) { + Builder.Datetime64(date.Seconds()); + } else { + Builder.Datetime64(GetArithmetic(token)); + } + break; + } + case EPrimitiveType::Timestamp64: { + TInstant date; + if (TInstant::TryParseIso8601(token, date)) { + Builder.Timestamp64(date.MicroSeconds()); + } else { + Builder.Timestamp64(GetArithmetic(token)); + } + break; + } + case EPrimitiveType::Interval64: + Builder.Interval64(GetArithmetic(token)); + break; + case EPrimitiveType::TzDate: + Builder.TzDate(token); + break; + case EPrimitiveType::TzDatetime: + Builder.TzDatetime(token); + break; + case EPrimitiveType::TzTimestamp: + Builder.TzTimestamp(token); + break; + default: + throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); + } + } + + void BuildValue(const TStringBuf& token) { + switch (Parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: { + BuildPrimitive(std::string{token}); + break; + } + case TTypeParser::ETypeKind::Decimal: { + Builder.Decimal(TDecimalValue(std::string(token), Parser.GetDecimal().Precision, Parser.GetDecimal().Scale)); + break; + } + case TTypeParser::ETypeKind::Optional: { + Parser.OpenOptional(); + if (NullValue && token == NullValue) { + Builder.EmptyOptional(GetType()); + } else { + Builder.BeginOptional(); + BuildValue(token); + Builder.EndOptional(); + } + Parser.CloseOptional(); + break; + } + case TTypeParser::ETypeKind::Null: { + EnsureNull(token); + break; + } + case TTypeParser::ETypeKind::Void: { + EnsureNull(token); + break; + } + case TTypeParser::ETypeKind::Tagged: { + Parser.OpenTagged(); + Builder.BeginTagged(Parser.GetTag()); + BuildValue(token); + Builder.EndTagged(); + Parser.CloseTagged(); + break; + } + case TTypeParser::ETypeKind::Pg: { + if (NullValue && token == NullValue) { + Builder.Pg(TPgValue(TPgValue::VK_NULL, {}, Parser.GetPg())); + } else { + Builder.Pg(TPgValue(TPgValue::VK_TEXT, TString(token), Parser.GetPg())); + } + break; + } + default: + throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); + } + } + + void BuildType(TTypeBuilder& typeBuilder) { + switch (Parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: + typeBuilder.Primitive(Parser.GetPrimitive()); + break; + + case TTypeParser::ETypeKind::Decimal: + typeBuilder.Decimal(Parser.GetDecimal()); + break; + + case TTypeParser::ETypeKind::Optional: + Parser.OpenOptional(); + typeBuilder.BeginOptional(); + BuildType(typeBuilder); + typeBuilder.EndOptional(); + Parser.CloseOptional(); + break; + + case TTypeParser::ETypeKind::Tagged: + Parser.OpenTagged(); + typeBuilder.BeginTagged(Parser.GetTag()); + BuildType(typeBuilder); + typeBuilder.EndTagged(); + Parser.CloseTagged(); + break; + + case TTypeParser::ETypeKind::Pg: + typeBuilder.Pg(Parser.GetPg()); + break; + + default: + throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); + } + } + + TType GetType() { + TTypeBuilder typeBuilder; + BuildType(typeBuilder); + return typeBuilder.Build(); + } + + bool GetBool(const TString& token) const { + if (token == "true") { + return true; + } + if (token == "false") { + return false; + } + throw TCsvParseException() << "Expected bool value: \"true\" or \"false\", received: \"" << token << "\"."; + } + + void EnsureNull(const TStringBuf& token) const { + if (!NullValue) { + throw TCsvParseException() << "Expected null value instead of \"" << token << "\", but null value is not set."; + } + if (token != NullValue) { + throw TCsvParseException() << "Expected null value: \"" << NullValue << "\", received: \"" << token << "\"."; + } + } + + template + bool TryParseArithmetic(const TString& token) const { + size_t cnt; + try { + auto value = StringToArithmetic(token, cnt); + if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { + return false; + } + } catch (std::exception& e) { + return false; + } + return true; + } + + bool TryParseBool(const TString& token) const { + TString tokenLowerCase = to_lower(token); + return tokenLowerCase == "true" || tokenLowerCase == "false"; + } + + bool TryParsePrimitive(const TString& token) { + switch (Parser.GetPrimitive()) { + case EPrimitiveType::Uint8: + return TryParseArithmetic(token) && !token.StartsWith('-'); + case EPrimitiveType::Uint16: + return TryParseArithmetic(token) && !token.StartsWith('-');; + case EPrimitiveType::Uint32: + return TryParseArithmetic(token) && !token.StartsWith('-');; + case EPrimitiveType::Uint64: + return TryParseArithmetic(token) && !token.StartsWith('-');; + case EPrimitiveType::Int8: + return TryParseArithmetic(token); + case EPrimitiveType::Int16: + return TryParseArithmetic(token); + case EPrimitiveType::Int32: + return TryParseArithmetic(token); + case EPrimitiveType::Int64: + return TryParseArithmetic(token); + case EPrimitiveType::Bool: + return TryParseBool(token); + case EPrimitiveType::Json: + return token.StartsWith('{') && token.EndsWith('}'); + break; + case EPrimitiveType::JsonDocument: + break; + case EPrimitiveType::Yson: + break; + case EPrimitiveType::Uuid: + static std::regex uuidRegexTemplate("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); + return std::regex_match(token.c_str(), uuidRegexTemplate); + case EPrimitiveType::Float: + return TryParseArithmetic(token); + case EPrimitiveType::Double: + return TryParseArithmetic(token); + case EPrimitiveType::DyNumber: + break; + case EPrimitiveType::Date: { + TInstant date; + return TInstant::TryParseIso8601(token, date) && token.length() <= 10; + } + case EPrimitiveType::Datetime: { + TInstant datetime; + return TInstant::TryParseIso8601(token, datetime) && token.length() <= 19; + } + case EPrimitiveType::Timestamp: { + TInstant timestamp; + return TInstant::TryParseIso8601(token, timestamp) || TryParseArithmetic(token); + } + case EPrimitiveType::Interval: + break; + case EPrimitiveType::Date32: { + TInstant date; + return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); + } + case EPrimitiveType::Datetime64: { + TInstant date; + return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); + } + case EPrimitiveType::Timestamp64: { + TInstant date; + return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); + } + case EPrimitiveType::Interval64: + return TryParseArithmetic(token); + case EPrimitiveType::TzDate: + break; + case EPrimitiveType::TzDatetime: + break; + case EPrimitiveType::TzTimestamp: + break; + default: + throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); + } + return false; + } + + bool TryParseValue(const TStringBuf& token, TPossibleType& possibleType) { + if (NullValue && token == NullValue) { + possibleType.SetHasNulls(true); + return true; + } + possibleType.SetHasNonNulls(true); + switch (Parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: { + return TryParsePrimitive(TString(token)); + } + case TTypeParser::ETypeKind::Decimal: { + break; + } + default: + throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); + } + return false; + } + + TArenaAllocatedValue Convert(const TStringBuf& token) { + BuildValue(token); + return Builder.BuildArenaAllocatedValue(); + } + +private: + TTypeParser& Parser; + const std::optional NullValue = ""; + TArenaAllocatedValueBuilder Builder; +}; + + + TCsvParseException FormatError(const std::exception& inputError, const TCsvParser::TParseMetadata& meta, const std::optional& columnName = {}) { @@ -446,6 +853,23 @@ TValue FieldToValue(TTypeParser& parser, } } +TArenaAllocatedValue FieldToValueOnArena( + TTypeParser& parser, + const TStringBuf& token, + const std::optional& nullValue, + const TCsvParser::TParseMetadata& meta, + const std::string& columnName, + google::protobuf::Arena* arena +) { + Y_ASSERT(arena != nullptr); + try { + TCsvToYdbOnArenaConverter converter(parser, nullValue, arena); + return converter.Convert(token); + } catch (std::exception& e) { + throw FormatError(e, meta, columnName); + } +} + bool TryParse(TTypeParser& parser, const TStringBuf& token, const std::optional& nullValue, TPossibleType& possibleType) { try { TCsvToYdbConverter converter(parser, nullValue); @@ -571,7 +995,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, /* arena = */ nullptr); if (row.has_value()) { ++row.value(); } @@ -603,7 +1027,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, arena); if (row.has_value()) { ++row.value(); } @@ -613,7 +1037,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( return TArenaAllocatedValue(ResultListType.value(), value); } - +std::atomic_bool print = true; // Helper method to process a single CSV line void TCsvParser::ProcessCsvLine( @@ -621,8 +1045,9 @@ void TCsvParser::ProcessCsvLine( google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename) const { - + const TString& filename, + google::protobuf::Arena* arena +) const { NCsvFormat::CsvSplitter splitter(line, Delimeter); auto* structItems = listItems->Add()->mutable_items(); structItems->Reserve(ResultColumnCount); @@ -641,8 +1066,27 @@ void TCsvParser::ProcessCsvLine( if (!*skipIt) { // TODO: create Ydb::Value on the arena to avoid copying here // TODO: (despite std::move, we copy from non-arena allocated Ydb::Value into arena-allocated structItems) - TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); - *structItems->Add() = std::move(builtValue).ExtractProto(); + if (arena != nullptr) { + TArenaAllocatedValue builtValue = FieldToValueOnArena( + *typeParserIt->get(), nextField, NullValue, meta, *headerIt, arena); + + // if (print.load()) { + // Cerr << "arena: " << static_cast(arena) << Endl; + // Cerr << "builtValue.GetProto()->GetArena(): " << static_cast(builtValue.GetProto()->GetArena()) << Endl; + // Cerr << "structItems->GetArena(): " << static_cast(structItems->GetArena()) << Endl; + // Cerr << "structItems->Add()->GetArena(): " << static_cast(structItems->Add()->GetArena()) << Endl; + // print.store(false); + // } + // Y_ASSERT(arena != nullptr); + // Y_ASSERT(builtValue.GetProto()->GetArena() != nullptr); + // Y_ASSERT(builtValue.GetProto()->GetArena() == structItems->GetArena()); + + *structItems->Add() = std::move(builtValue).ExtractProto(); + } else { + TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); + *structItems->Add() = std::move(builtValue).ExtractProto(); + } + ++typeParserIt; } ++headerIt; diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 1a1330de904b..d5feebfc1326 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -110,7 +110,9 @@ class TCsvParser { google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename) const; + const TString& filename, + google::protobuf::Arena* arena + ) const; }; } diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h index 06eb71ac0703..9205080b7f7e 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h @@ -8,6 +8,7 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; +class TArenaAllocatedValueBuilder; template class TValueBuilderBase; diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 3f1570aa8bd3..c4aa1792a55c 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -2,6 +2,7 @@ #include "fwd.h" +#include #include #include @@ -539,6 +540,8 @@ class TValueBuilderBase : public TMoveOnly { TValueBuilderBase(); + TValueBuilderBase(google::protobuf::Arena* arena); + TValueBuilderBase(const TType& type); TValueBuilderBase(Ydb::Type& type, Ydb::Value& value); @@ -560,6 +563,13 @@ class TValueBuilder : public TValueBuilderBase { TValue Build(); }; +class TArenaAllocatedValueBuilder : public TValueBuilderBase { +public: + TArenaAllocatedValueBuilder(google::protobuf::Arena* arena); + + TArenaAllocatedValue BuildArenaAllocatedValue(); +}; + } // namespace NYdb template<> diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index adcb001622d9..59409f9d0649 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2066,6 +2066,58 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// +class TValueHolder { +public: + virtual Ydb::Value& Value() = 0; + virtual Ydb::Value* ValuePtr() = 0; + virtual ~TValueHolder() = default; +}; + +class StackAllocatedValueHolder : public TValueHolder { +public: + Ydb::Value& Value() override { + return ProtoValue_; + } + + Ydb::Value* ValuePtr() override { + return &ProtoValue_; + } + +private: + Ydb::Value ProtoValue_; +}; + +class ArenaAllocatedValueHolder : public TValueHolder { +public: + ArenaAllocatedValueHolder(google::protobuf::Arena* arena) + // value is created lazily on first access + : ArenaAllocatedValue_(nullptr) + , Arena_(arena) { + Y_ASSERT(arena != nullptr); + } + + Ydb::Value& Value() override { + createValueIfAbsent(); + return *ArenaAllocatedValue_; + } + + Ydb::Value* ValuePtr() override { + createValueIfAbsent(); + return ArenaAllocatedValue_; + } + +private: + void createValueIfAbsent() { + if (ArenaAllocatedValue_ == nullptr) { + ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); + } + } + +private: + Ydb::Value* ArenaAllocatedValue_; + google::protobuf::Arena* Arena_; +}; + // TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; @@ -2092,19 +2144,29 @@ class TValueBuilderImpl { public: TValueBuilderImpl() : TypeBuilder_() + , ValueHolder_(std::make_unique()) { - PushPath(ProtoValue_); + PushPath(ValueHolder_->Value()); + } + + TValueBuilderImpl(google::protobuf::Arena* arena) + : TypeBuilder_() + , ValueHolder_(std::make_unique(arena)) + { + PushPath(ValueHolder_->Value()); } TValueBuilderImpl(const TType& type) : TypeBuilder_() + , ValueHolder_(std::make_unique()) { - PushPath(ProtoValue_); + PushPath(ValueHolder_->Value()); GetType().CopyFrom(type.GetProto()); } TValueBuilderImpl(Ydb::Type& type, Ydb::Value& value) : TypeBuilder_(type) + , ValueHolder_(std::make_unique()) { PushPath(value); } @@ -2116,15 +2178,20 @@ class TValueBuilderImpl { } } - TValue BuildValue() { + TValue BuildStackAllocatedValue() { CheckValue(); Ydb::Value value; - value.Swap(&ProtoValue_); + value.Swap(&ValueHolder_->Value()); return TValue(TypeBuilder_.Build(), std::move(value)); } + TArenaAllocatedValue BuildArenaAllocatedValue() { + CheckValue(); + return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_->ValuePtr()); + } + void Bool(bool value) { FillPrimitiveType(EPrimitiveType::Bool); GetValue().set_bool_value(value); @@ -2832,10 +2899,10 @@ class TValueBuilderImpl { } private: - //TTypeBuilder TypeBuilder_; TTypeBuilder::TImpl TypeBuilder_; - Ydb::Value ProtoValue_; + // Ydb::Value ProtoValue_; + std::unique_ptr ValueHolder_; std::map StructsMap_; TStackVec Path_; @@ -2854,6 +2921,12 @@ template TValueBuilderBase::TValueBuilderBase() : Impl_(new TValueBuilderImpl()) {} + +template +TValueBuilderBase::TValueBuilderBase(google::protobuf::Arena* arena) + : Impl_(new TValueBuilderImpl(arena)) {} + + template TValueBuilderBase::TValueBuilderBase(const TType& type) : Impl_(new TValueBuilderImpl(type)) {} @@ -3411,6 +3484,7 @@ TDerived& TValueBuilderBase::EndTagged() { template class TValueBuilderBase; template class TValueBuilderBase; +template class TValueBuilderBase; //////////////////////////////////////////////////////////////////////////////// @@ -3421,7 +3495,16 @@ TValueBuilder::TValueBuilder(const TType& type) : TValueBuilderBase(type) {} TValue TValueBuilder::Build() { - return Impl_->BuildValue(); + return Impl_->BuildStackAllocatedValue(); +} + +//////////////////////////////////////////////////////////////////////////////// + +TArenaAllocatedValueBuilder::TArenaAllocatedValueBuilder(google::protobuf::Arena* arena) + : TValueBuilderBase(arena) {} + +TArenaAllocatedValue TArenaAllocatedValueBuilder::BuildArenaAllocatedValue() { + return Impl_->BuildArenaAllocatedValue(); } } // namespace NYdb From 2ca666f062d6e21f4e108c521d7add2af2513555 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 8 May 2025 19:20:22 +0000 Subject: [PATCH 17/35] feat: select `TValueHolder` in compile time when call `TCsvParser::BuildListOnArena` This commit fixes the issue with virtual calls in `TValueHolder`, integrated in the commit 396d98fb36f9f6ea3a15d08ee118d5a4452d3d60. Average speed: 254-257 MiB/s (33.1-33.6 sec spent) There seems to be no performance difference in terms of upload speed. The only difference is that the percental fraction of CPU load that `TCsvParser::BuildListOnArena` method takes: 1. this commit: 42.7% of the total CPU time. 2. commit 396d98fb36: 43.1% of the total CPU time. 3. without both commits: 49.3% of the total CPU time. --- ydb/public/sdk/cpp/client/ydb_value/fwd.h | 1 + .../ydb-cpp-sdk/client/params/params.h | 2 +- .../include/ydb-cpp-sdk/client/value/fwd.h | 2 +- .../include/ydb-cpp-sdk/client/value/value.h | 68 ++- .../sdk/cpp/src/client/params/params.cpp | 2 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 498 ++++++++---------- 6 files changed, 280 insertions(+), 293 deletions(-) diff --git a/ydb/public/sdk/cpp/client/ydb_value/fwd.h b/ydb/public/sdk/cpp/client/ydb_value/fwd.h index f9f5df1abf5c..d1b26e0091fa 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/fwd.h +++ b/ydb/public/sdk/cpp/client/ydb_value/fwd.h @@ -8,6 +8,7 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; +class TArenaAllocatedValueBuilder; template class TValueBuilderBase; diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h index 71749a9c1adb..31a0d904a821 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h @@ -60,7 +60,7 @@ class TParams { std::shared_ptr Impl_; }; -class TParamValueBuilder : public TValueBuilderBase { +class TParamValueBuilder : public TValueBuilderBase { friend class TParamsBuilder; public: TParamsBuilder& Build(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h index 9205080b7f7e..24a6913987b7 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h @@ -10,7 +10,7 @@ class TValueParser; class TValueBuilder; class TArenaAllocatedValueBuilder; -template +template class TValueBuilderBase; } // namespace NYdb diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index c4aa1792a55c..f863d4bd81e9 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -7,6 +7,7 @@ #include #include +#include namespace Ydb { class Type; @@ -173,6 +174,7 @@ std::string FormatType(const TType& type); //! To create complex type, corresponding scope should be opened by Begin*/End* calls //! To create complex repeated type, Add* should be called at least once class TTypeBuilder : public TMoveOnly { + template friend class TValueBuilderImpl; public: TTypeBuilder(TTypeBuilder&&); @@ -423,9 +425,59 @@ class TValueParser : public TMoveOnly { std::unique_ptr Impl_; }; +template class TValueBuilderImpl; -template + +class StackAllocatedValueHolder { +public: + Ydb::Value& Value() { + return ProtoValue_; + } + + Ydb::Value* ValuePtr() { + return &ProtoValue_; + } + +private: + Ydb::Value ProtoValue_; +}; + +class ArenaAllocatedValueHolder { +public: + ArenaAllocatedValueHolder(google::protobuf::Arena* arena) + // value is created lazily on first access + : ArenaAllocatedValue_(nullptr) + , Arena_(arena) { + Y_ASSERT(arena != nullptr); + } + + Ydb::Value& Value() { + createValueIfAbsent(); + return *ArenaAllocatedValue_; + } + + Ydb::Value* ValuePtr() { + createValueIfAbsent(); + return ArenaAllocatedValue_; + } + +private: + void createValueIfAbsent() { + if (ArenaAllocatedValue_ == nullptr) { + ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); + } + } + +private: + Ydb::Value* ArenaAllocatedValue_; + google::protobuf::Arena* Arena_; +}; + + + + +template class TValueBuilderBase : public TMoveOnly { friend TDerived; public: @@ -538,23 +590,21 @@ class TValueBuilderBase : public TMoveOnly { protected: TValueBuilderBase(TValueBuilderBase&&); - TValueBuilderBase(); - - TValueBuilderBase(google::protobuf::Arena* arena); + TValueBuilderBase(ValueHolder holder); - TValueBuilderBase(const TType& type); + TValueBuilderBase(ValueHolder holder, const TType& type); - TValueBuilderBase(Ydb::Type& type, Ydb::Value& value); + TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value); ~TValueBuilderBase(); void CheckValue(); private: - std::unique_ptr Impl_; + std::unique_ptr> Impl_; }; -class TValueBuilder : public TValueBuilderBase { +class TValueBuilder : public TValueBuilderBase { public: TValueBuilder(); @@ -563,7 +613,7 @@ class TValueBuilder : public TValueBuilderBase { TValue Build(); }; -class TArenaAllocatedValueBuilder : public TValueBuilderBase { +class TArenaAllocatedValueBuilder : public TValueBuilderBase { public: TArenaAllocatedValueBuilder(google::protobuf::Arena* arena); diff --git a/ydb/public/sdk/cpp/src/client/params/params.cpp b/ydb/public/sdk/cpp/src/client/params/params.cpp index 25cb1a71c93c..87aec4f6e38a 100644 --- a/ydb/public/sdk/cpp/src/client/params/params.cpp +++ b/ydb/public/sdk/cpp/src/client/params/params.cpp @@ -130,7 +130,7 @@ class TParamsBuilder::TImpl { //////////////////////////////////////////////////////////////////////////////// TParamValueBuilder::TParamValueBuilder(TParamsBuilder& owner, Ydb::Type& typeProto, Ydb::Value& valueProto) - : TValueBuilderBase(typeProto, valueProto) + : TValueBuilderBase(StackAllocatedValueHolder(), typeProto, valueProto) , Owner_(owner) , Finished_(false) {} diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 59409f9d0649..d3fa0ab97eb5 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2066,59 +2066,8 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// -class TValueHolder { -public: - virtual Ydb::Value& Value() = 0; - virtual Ydb::Value* ValuePtr() = 0; - virtual ~TValueHolder() = default; -}; - -class StackAllocatedValueHolder : public TValueHolder { -public: - Ydb::Value& Value() override { - return ProtoValue_; - } - - Ydb::Value* ValuePtr() override { - return &ProtoValue_; - } - -private: - Ydb::Value ProtoValue_; -}; - -class ArenaAllocatedValueHolder : public TValueHolder { -public: - ArenaAllocatedValueHolder(google::protobuf::Arena* arena) - // value is created lazily on first access - : ArenaAllocatedValue_(nullptr) - , Arena_(arena) { - Y_ASSERT(arena != nullptr); - } - - Ydb::Value& Value() override { - createValueIfAbsent(); - return *ArenaAllocatedValue_; - } - - Ydb::Value* ValuePtr() override { - createValueIfAbsent(); - return ArenaAllocatedValue_; - } - -private: - void createValueIfAbsent() { - if (ArenaAllocatedValue_ == nullptr) { - ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); - } - } - -private: - Ydb::Value* ArenaAllocatedValue_; - google::protobuf::Arena* Arena_; -}; - // TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called +template class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; using TMembersMap = std::map; @@ -2142,31 +2091,24 @@ class TValueBuilderImpl { }; public: - TValueBuilderImpl() - : TypeBuilder_() - , ValueHolder_(std::make_unique()) - { - PushPath(ValueHolder_->Value()); - } - - TValueBuilderImpl(google::protobuf::Arena* arena) + TValueBuilderImpl(ValueHolder holder) : TypeBuilder_() - , ValueHolder_(std::make_unique(arena)) + , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_->Value()); + PushPath(ValueHolder_.Value()); } - TValueBuilderImpl(const TType& type) + TValueBuilderImpl(ValueHolder holder, const TType& type) : TypeBuilder_() - , ValueHolder_(std::make_unique()) + , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_->Value()); + PushPath(ValueHolder_.Value()); GetType().CopyFrom(type.GetProto()); } - TValueBuilderImpl(Ydb::Type& type, Ydb::Value& value) + TValueBuilderImpl(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) : TypeBuilder_(type) - , ValueHolder_(std::make_unique()) + , ValueHolder_(std::move(holder)) { PushPath(value); } @@ -2182,14 +2124,14 @@ class TValueBuilderImpl { CheckValue(); Ydb::Value value; - value.Swap(&ValueHolder_->Value()); + value.Swap(&ValueHolder_.Value()); return TValue(TypeBuilder_.Build(), std::move(value)); } TArenaAllocatedValue BuildArenaAllocatedValue() { CheckValue(); - return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_->ValuePtr()); + return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_.ValuePtr()); } void Bool(bool value) { @@ -2902,7 +2844,7 @@ class TValueBuilderImpl { //TTypeBuilder TypeBuilder_; TTypeBuilder::TImpl TypeBuilder_; // Ydb::Value ProtoValue_; - std::unique_ptr ValueHolder_; + ValueHolder ValueHolder_; std::map StructsMap_; TStackVec Path_; @@ -2911,217 +2853,211 @@ class TValueBuilderImpl { //////////////////////////////////////////////////////////////////////////////// -template -TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; - -template -TValueBuilderBase::~TValueBuilderBase() = default; - -template -TValueBuilderBase::TValueBuilderBase() - : Impl_(new TValueBuilderImpl()) {} - +template +TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; -template -TValueBuilderBase::TValueBuilderBase(google::protobuf::Arena* arena) - : Impl_(new TValueBuilderImpl(arena)) {} +template +TValueBuilderBase::~TValueBuilderBase() = default; +template +TValueBuilderBase::TValueBuilderBase(ValueHolder holder) + : Impl_(new TValueBuilderImpl(std::move(holder))) {} -template -TValueBuilderBase::TValueBuilderBase(const TType& type) - : Impl_(new TValueBuilderImpl(type)) {} +template +TValueBuilderBase::TValueBuilderBase(ValueHolder holder, const TType& type) + : Impl_(new TValueBuilderImpl(std::move(holder), type)) {} -template -TValueBuilderBase::TValueBuilderBase(Ydb::Type& type, Ydb::Value& value) - : Impl_(new TValueBuilderImpl(type, value)) {} +template +TValueBuilderBase::TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) + : Impl_(new TValueBuilderImpl(std::move(holder), type, value)) {} -template -void TValueBuilderBase::CheckValue() { +template +void TValueBuilderBase::CheckValue() { return Impl_->CheckValue(); } -template -TDerived& TValueBuilderBase::Bool(bool value) { +template +TDerived& TValueBuilderBase::Bool(bool value) { Impl_->Bool(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int8(i8 value) { +template +TDerived& TValueBuilderBase::Int8(i8 value) { Impl_->Int8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint8(ui8 value) { +template +TDerived& TValueBuilderBase::Uint8(ui8 value) { Impl_->Uint8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int16(i16 value) { +template +TDerived& TValueBuilderBase::Int16(i16 value) { Impl_->Int16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint16(ui16 value) { +template +TDerived& TValueBuilderBase::Uint16(ui16 value) { Impl_->Uint16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int32(i32 value) { +template +TDerived& TValueBuilderBase::Int32(i32 value) { Impl_->Int32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint32(ui32 value) { +template +TDerived& TValueBuilderBase::Uint32(ui32 value) { Impl_->Uint32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int64(int64_t value) { +template +TDerived& TValueBuilderBase::Int64(int64_t value) { Impl_->Int64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint64(uint64_t value) { +template +TDerived& TValueBuilderBase::Uint64(uint64_t value) { Impl_->Uint64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Float(float value) { +template +TDerived& TValueBuilderBase::Float(float value) { Impl_->Float(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Double(double value) { +template +TDerived& TValueBuilderBase::Double(double value) { Impl_->Double(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date(const TInstant& value) { +template +TDerived& TValueBuilderBase::Date(const TInstant& value) { Impl_->Date(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime(const TInstant& value) { +template +TDerived& TValueBuilderBase::Datetime(const TInstant& value) { Impl_->Datetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { +template +TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { Impl_->Timestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval(int64_t value) { +template +TDerived& TValueBuilderBase::Interval(int64_t value) { Impl_->Interval(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date32(const i32 value) { +template +TDerived& TValueBuilderBase::Date32(const i32 value) { Impl_->Date32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime64(const int64_t value) { +template +TDerived& TValueBuilderBase::Datetime64(const int64_t value) { Impl_->Datetime64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { +template +TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { Impl_->Timestamp64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval64(const int64_t value) { +template +TDerived& TValueBuilderBase::Interval64(const int64_t value) { Impl_->Interval64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDate(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDate(const std::string& value) { Impl_->TzDate(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { Impl_->TzDatetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { +template +TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { Impl_->TzTimestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::String(const std::string& value) { +template +TDerived& TValueBuilderBase::String(const std::string& value) { Impl_->String(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Utf8(const std::string& value) { +template +TDerived& TValueBuilderBase::Utf8(const std::string& value) { Impl_->Utf8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Yson(const std::string& value) { +template +TDerived& TValueBuilderBase::Yson(const std::string& value) { Impl_->Yson(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Json(const std::string& value) { +template +TDerived& TValueBuilderBase::Json(const std::string& value) { Impl_->Json(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { +template +TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { Impl_->Uuid(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { +template +TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { Impl_->JsonDocument(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DyNumber(const std::string& value) { +template +TDerived& TValueBuilderBase::DyNumber(const std::string& value) { Impl_->DyNumber(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { +template +TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { Impl_->Decimal(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Pg(const TPgValue& value) { +template +TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->Pg(value); return static_cast(*this); } @@ -3142,357 +3078,357 @@ TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->EndOptional(); \ return static_cast(*this); -template -TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { SET_OPT_VALUE_MAYBE(Bool); } -template -TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int8); } -template -TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint8); } -template -TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int16); } -template -TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint16); } -template -TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int32); } -template -TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint32); } -template -TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int64); } -template -TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint64); } -template -TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { SET_OPT_VALUE_MAYBE(Float); } -template -TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { SET_OPT_VALUE_MAYBE(Double); } -template -TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date); } -template -TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime); } -template -TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp); } -template -TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval); } -template -TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date32); } -template -TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime64); } -template -TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp64); } -template -TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval64); } -template -TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDate); } -template -TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDatetime); } -template -TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzTimestamp); } -template -TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { SET_OPT_VALUE_MAYBE(String); } -template -TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Utf8); } -template -TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Yson); } -template -TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Json); } -template -TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uuid); } -template -TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { SET_OPT_VALUE_MAYBE(JsonDocument); } -template -TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { SET_OPT_VALUE_MAYBE(DyNumber); } -template -TDerived& TValueBuilderBase::BeginOptional() { +template +TDerived& TValueBuilderBase::BeginOptional() { Impl_->BeginOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndOptional() { +template +TDerived& TValueBuilderBase::EndOptional() { Impl_->EndOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional() { +template +TDerived& TValueBuilderBase::EmptyOptional() { Impl_->EmptyOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginList() { +template +TDerived& TValueBuilderBase::BeginList() { Impl_->BeginList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndList() { +template +TDerived& TValueBuilderBase::EndList() { Impl_->EndList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem() { +template +TDerived& TValueBuilderBase::AddListItem() { Impl_->AddListItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { Impl_->AddListItem(itemValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { Impl_->AddListItem(std::move(itemValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { Impl_->EmptyList(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList() { +template +TDerived& TValueBuilderBase::EmptyList() { Impl_->EmptyList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginStruct() { +template +TDerived& TValueBuilderBase::BeginStruct() { Impl_->BeginStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { Impl_->AddMember(memberName); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { Impl_->AddMember(memberName, memberValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { Impl_->AddMember(memberName, std::move(memberValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndStruct() { +template +TDerived& TValueBuilderBase::EndStruct() { Impl_->EndStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTuple() { +template +TDerived& TValueBuilderBase::BeginTuple() { Impl_->BeginTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement() { +template +TDerived& TValueBuilderBase::AddElement() { Impl_->AddElement(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { +template +TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { Impl_->AddElement(elementValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTuple() { +template +TDerived& TValueBuilderBase::EndTuple() { Impl_->EndTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginDict() { +template +TDerived& TValueBuilderBase::BeginDict() { Impl_->BeginDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddDictItem() { +template +TDerived& TValueBuilderBase::AddDictItem() { Impl_->AddDictItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey() { +template +TDerived& TValueBuilderBase::DictKey() { Impl_->DictKey(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { +template +TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { Impl_->DictKey(keyValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload() { +template +TDerived& TValueBuilderBase::DictPayload() { Impl_->DictPayload(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { +template +TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { Impl_->DictPayload(payloadValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndDict() { +template +TDerived& TValueBuilderBase::EndDict() { Impl_->EndDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { +template +TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { Impl_->EmptyDict(keyType, payloadType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict() { +template +TDerived& TValueBuilderBase::EmptyDict() { Impl_->EmptyDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { +template +TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { Impl_->BeginTagged(tag); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTagged() { +template +TDerived& TValueBuilderBase::EndTagged() { Impl_->EndTagged(); return static_cast(*this); } //////////////////////////////////////////////////////////////////////////////// -template class TValueBuilderBase; -template class TValueBuilderBase; -template class TValueBuilderBase; +template class TValueBuilderBase; +template class TValueBuilderBase; +template class TValueBuilderBase; //////////////////////////////////////////////////////////////////////////////// TValueBuilder::TValueBuilder() - : TValueBuilderBase() {} + : TValueBuilderBase(StackAllocatedValueHolder()) {} TValueBuilder::TValueBuilder(const TType& type) - : TValueBuilderBase(type) {} + : TValueBuilderBase(StackAllocatedValueHolder(), type) {} TValue TValueBuilder::Build() { return Impl_->BuildStackAllocatedValue(); @@ -3501,7 +3437,7 @@ TValue TValueBuilder::Build() { //////////////////////////////////////////////////////////////////////////////// TArenaAllocatedValueBuilder::TArenaAllocatedValueBuilder(google::protobuf::Arena* arena) - : TValueBuilderBase(arena) {} + : TValueBuilderBase(ArenaAllocatedValueHolder(arena)) {} TArenaAllocatedValue TArenaAllocatedValueBuilder::BuildArenaAllocatedValue() { return Impl_->BuildArenaAllocatedValue(); From 12bd12b9f0835f9db40b6931e19308ae8d67f4a8 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 23 May 2025 20:52:22 +0000 Subject: [PATCH 18/35] feat: output upload metrics for extraction in evaluation pipeline --- ydb/public/lib/ydb_cli/import/import.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 518114f94461..7414b1fe4edc 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -855,6 +855,15 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons std::cerr << "Elapsed: " << std::setprecision(3) << duration.SecondsFloat() << " sec. Total read size: " << PrettifyBytes(TotalBytesRead) << ". Average processing speed: " << PrettifyBytes((double)TotalBytesRead / duration.SecondsFloat()) << "/s." << std::endl; + + // print strcture for the pipeline: + // elapsed_time_sec: [float] + // total_read_size_byte: [integer] + // avg_processing_speed_bytes_per_sec: [integer] + std::cout << "elapsed_time_sec: " << std::setprecision(5) << duration.SecondsFloat() << std::endl; + std::cout << "total_read_size_byte: " << TotalBytesRead << std::endl; + std::cout << "avg_processing_speed_MiB_per_sec: " << std::setprecision(5) + << ((double)TotalBytesRead / duration.SecondsFloat()) / (1024 * 1024) << std::endl; } // Removing all progress files that were a part of this import From 65999d78acb9df86fe73a24b0975fb3d25b9ea52 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 12:25:11 +0000 Subject: [PATCH 19/35] refactor: remove modifications in SDK related to TValueBuilder --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 456 +----------------- ydb/public/lib/ydb_cli/common/csv_parser.h | 3 +- ydb/public/sdk/cpp/client/ydb_value/fwd.h | 1 - .../ydb-cpp-sdk/client/params/params.h | 2 +- .../include/ydb-cpp-sdk/client/value/fwd.h | 3 +- .../include/ydb-cpp-sdk/client/value/value.h | 72 +-- .../sdk/cpp/src/client/params/params.cpp | 2 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 445 ++++++++--------- 8 files changed, 230 insertions(+), 754 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index cfcd3db1110a..2ad122ced30f 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -415,414 +415,6 @@ class TCsvToYdbConverter { }; - -// TODO: merge with the impl above -class TCsvToYdbOnArenaConverter { -public: - explicit TCsvToYdbOnArenaConverter(TTypeParser& parser, const std::optional& nullValue, google::protobuf::Arena* arena) - : Parser(parser) - , NullValue(nullValue) - , Builder(arena) {} - - template && std::is_signed_v, std::nullptr_t> = nullptr> - static i64 StringToArithmetic(const TString& token, size_t& cnt) { - return std::stoll(token, &cnt); - } - - template && std::is_unsigned_v, std::nullptr_t> = nullptr> - static ui64 StringToArithmetic(const TString& token, size_t& cnt) { - return std::stoull(token, &cnt); - } - - template , std::nullptr_t> = nullptr> - static float StringToArithmetic(const TString& token, size_t& cnt) { - return std::stof(token, &cnt); - } - - template , std::nullptr_t> = nullptr> - static double StringToArithmetic(const TString& token, size_t& cnt) { - return std::stod(token, &cnt); - } - - template - T GetArithmetic(const TString& token) const { - size_t cnt; - try { - auto value = StringToArithmetic(token, cnt); - if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { - throw yexception(); - } - return static_cast(value); - } catch (std::exception& e) { - throw TCsvParseException() << "Expected " << Parser.GetPrimitive() << " value, received: \"" << token << "\"."; - } - } - - void BuildPrimitive(const TString& token) { - switch (Parser.GetPrimitive()) { - case EPrimitiveType::Int8: - Builder.Int8(GetArithmetic(token)); - break; - case EPrimitiveType::Int16: - Builder.Int16(GetArithmetic(token)); - break; - case EPrimitiveType::Int32: - Builder.Int32(GetArithmetic(token)); - break; - case EPrimitiveType::Int64: - Builder.Int64(GetArithmetic(token)); - break; - case EPrimitiveType::Uint8: - Builder.Uint8(GetArithmetic(token)); - break; - case EPrimitiveType::Uint16: - Builder.Uint16(GetArithmetic(token)); - break; - case EPrimitiveType::Uint32: - Builder.Uint32(GetArithmetic(token)); - break; - case EPrimitiveType::Uint64: - Builder.Uint64(GetArithmetic(token)); - break; - case EPrimitiveType::Bool: - Builder.Bool(GetBool(token)); - break; - case EPrimitiveType::String: - Builder.String(token); - break; - case EPrimitiveType::Utf8: - Builder.Utf8(token); - break; - case EPrimitiveType::Json: - Builder.Json(token); - break; - case EPrimitiveType::JsonDocument: - Builder.JsonDocument(token); - break; - case EPrimitiveType::Yson: - Builder.Yson(token); - break; - case EPrimitiveType::Uuid: - Builder.Uuid(TUuidValue{token}); - break; - case EPrimitiveType::Float: - Builder.Float(GetArithmetic(token)); - break; - case EPrimitiveType::Double: - Builder.Double(GetArithmetic(token)); - break; - case EPrimitiveType::DyNumber: - Builder.DyNumber(token); - break; - case EPrimitiveType::Date: { - TInstant date; - if (!TInstant::TryParseIso8601(token, date)) { - date = TInstant::Days(GetArithmetic(token)); - } - Builder.Date(date); - break; - } - case EPrimitiveType::Datetime: { - TInstant datetime; - if (!TInstant::TryParseIso8601(token, datetime)) { - datetime = TInstant::Seconds(GetArithmetic(token)); - } - Builder.Datetime(datetime); - break; - } - case EPrimitiveType::Timestamp: { - TInstant timestamp; - if (!TInstant::TryParseIso8601(token, timestamp)) { - timestamp = TInstant::MicroSeconds(GetArithmetic(token)); - } - Builder.Timestamp(timestamp); - break; - } - case EPrimitiveType::Interval: - Builder.Interval(GetArithmetic(token)); - break; - case EPrimitiveType::Date32: { - TInstant date; - if (TInstant::TryParseIso8601(token, date)) { - Builder.Date32(date.Days()); - } else { - Builder.Date32(GetArithmetic(token)); - } - break; - } - case EPrimitiveType::Datetime64: { - TInstant date; - if (TInstant::TryParseIso8601(token, date)) { - Builder.Datetime64(date.Seconds()); - } else { - Builder.Datetime64(GetArithmetic(token)); - } - break; - } - case EPrimitiveType::Timestamp64: { - TInstant date; - if (TInstant::TryParseIso8601(token, date)) { - Builder.Timestamp64(date.MicroSeconds()); - } else { - Builder.Timestamp64(GetArithmetic(token)); - } - break; - } - case EPrimitiveType::Interval64: - Builder.Interval64(GetArithmetic(token)); - break; - case EPrimitiveType::TzDate: - Builder.TzDate(token); - break; - case EPrimitiveType::TzDatetime: - Builder.TzDatetime(token); - break; - case EPrimitiveType::TzTimestamp: - Builder.TzTimestamp(token); - break; - default: - throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); - } - } - - void BuildValue(const TStringBuf& token) { - switch (Parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: { - BuildPrimitive(std::string{token}); - break; - } - case TTypeParser::ETypeKind::Decimal: { - Builder.Decimal(TDecimalValue(std::string(token), Parser.GetDecimal().Precision, Parser.GetDecimal().Scale)); - break; - } - case TTypeParser::ETypeKind::Optional: { - Parser.OpenOptional(); - if (NullValue && token == NullValue) { - Builder.EmptyOptional(GetType()); - } else { - Builder.BeginOptional(); - BuildValue(token); - Builder.EndOptional(); - } - Parser.CloseOptional(); - break; - } - case TTypeParser::ETypeKind::Null: { - EnsureNull(token); - break; - } - case TTypeParser::ETypeKind::Void: { - EnsureNull(token); - break; - } - case TTypeParser::ETypeKind::Tagged: { - Parser.OpenTagged(); - Builder.BeginTagged(Parser.GetTag()); - BuildValue(token); - Builder.EndTagged(); - Parser.CloseTagged(); - break; - } - case TTypeParser::ETypeKind::Pg: { - if (NullValue && token == NullValue) { - Builder.Pg(TPgValue(TPgValue::VK_NULL, {}, Parser.GetPg())); - } else { - Builder.Pg(TPgValue(TPgValue::VK_TEXT, TString(token), Parser.GetPg())); - } - break; - } - default: - throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); - } - } - - void BuildType(TTypeBuilder& typeBuilder) { - switch (Parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: - typeBuilder.Primitive(Parser.GetPrimitive()); - break; - - case TTypeParser::ETypeKind::Decimal: - typeBuilder.Decimal(Parser.GetDecimal()); - break; - - case TTypeParser::ETypeKind::Optional: - Parser.OpenOptional(); - typeBuilder.BeginOptional(); - BuildType(typeBuilder); - typeBuilder.EndOptional(); - Parser.CloseOptional(); - break; - - case TTypeParser::ETypeKind::Tagged: - Parser.OpenTagged(); - typeBuilder.BeginTagged(Parser.GetTag()); - BuildType(typeBuilder); - typeBuilder.EndTagged(); - Parser.CloseTagged(); - break; - - case TTypeParser::ETypeKind::Pg: - typeBuilder.Pg(Parser.GetPg()); - break; - - default: - throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); - } - } - - TType GetType() { - TTypeBuilder typeBuilder; - BuildType(typeBuilder); - return typeBuilder.Build(); - } - - bool GetBool(const TString& token) const { - if (token == "true") { - return true; - } - if (token == "false") { - return false; - } - throw TCsvParseException() << "Expected bool value: \"true\" or \"false\", received: \"" << token << "\"."; - } - - void EnsureNull(const TStringBuf& token) const { - if (!NullValue) { - throw TCsvParseException() << "Expected null value instead of \"" << token << "\", but null value is not set."; - } - if (token != NullValue) { - throw TCsvParseException() << "Expected null value: \"" << NullValue << "\", received: \"" << token << "\"."; - } - } - - template - bool TryParseArithmetic(const TString& token) const { - size_t cnt; - try { - auto value = StringToArithmetic(token, cnt); - if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { - return false; - } - } catch (std::exception& e) { - return false; - } - return true; - } - - bool TryParseBool(const TString& token) const { - TString tokenLowerCase = to_lower(token); - return tokenLowerCase == "true" || tokenLowerCase == "false"; - } - - bool TryParsePrimitive(const TString& token) { - switch (Parser.GetPrimitive()) { - case EPrimitiveType::Uint8: - return TryParseArithmetic(token) && !token.StartsWith('-'); - case EPrimitiveType::Uint16: - return TryParseArithmetic(token) && !token.StartsWith('-');; - case EPrimitiveType::Uint32: - return TryParseArithmetic(token) && !token.StartsWith('-');; - case EPrimitiveType::Uint64: - return TryParseArithmetic(token) && !token.StartsWith('-');; - case EPrimitiveType::Int8: - return TryParseArithmetic(token); - case EPrimitiveType::Int16: - return TryParseArithmetic(token); - case EPrimitiveType::Int32: - return TryParseArithmetic(token); - case EPrimitiveType::Int64: - return TryParseArithmetic(token); - case EPrimitiveType::Bool: - return TryParseBool(token); - case EPrimitiveType::Json: - return token.StartsWith('{') && token.EndsWith('}'); - break; - case EPrimitiveType::JsonDocument: - break; - case EPrimitiveType::Yson: - break; - case EPrimitiveType::Uuid: - static std::regex uuidRegexTemplate("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); - return std::regex_match(token.c_str(), uuidRegexTemplate); - case EPrimitiveType::Float: - return TryParseArithmetic(token); - case EPrimitiveType::Double: - return TryParseArithmetic(token); - case EPrimitiveType::DyNumber: - break; - case EPrimitiveType::Date: { - TInstant date; - return TInstant::TryParseIso8601(token, date) && token.length() <= 10; - } - case EPrimitiveType::Datetime: { - TInstant datetime; - return TInstant::TryParseIso8601(token, datetime) && token.length() <= 19; - } - case EPrimitiveType::Timestamp: { - TInstant timestamp; - return TInstant::TryParseIso8601(token, timestamp) || TryParseArithmetic(token); - } - case EPrimitiveType::Interval: - break; - case EPrimitiveType::Date32: { - TInstant date; - return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); - } - case EPrimitiveType::Datetime64: { - TInstant date; - return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); - } - case EPrimitiveType::Timestamp64: { - TInstant date; - return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); - } - case EPrimitiveType::Interval64: - return TryParseArithmetic(token); - case EPrimitiveType::TzDate: - break; - case EPrimitiveType::TzDatetime: - break; - case EPrimitiveType::TzTimestamp: - break; - default: - throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); - } - return false; - } - - bool TryParseValue(const TStringBuf& token, TPossibleType& possibleType) { - if (NullValue && token == NullValue) { - possibleType.SetHasNulls(true); - return true; - } - possibleType.SetHasNonNulls(true); - switch (Parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: { - return TryParsePrimitive(TString(token)); - } - case TTypeParser::ETypeKind::Decimal: { - break; - } - default: - throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); - } - return false; - } - - TArenaAllocatedValue Convert(const TStringBuf& token) { - BuildValue(token); - return Builder.BuildArenaAllocatedValue(); - } - -private: - TTypeParser& Parser; - const std::optional NullValue = ""; - TArenaAllocatedValueBuilder Builder; -}; - - - TCsvParseException FormatError(const std::exception& inputError, const TCsvParser::TParseMetadata& meta, const std::optional& columnName = {}) { @@ -853,23 +445,6 @@ TValue FieldToValue(TTypeParser& parser, } } -TArenaAllocatedValue FieldToValueOnArena( - TTypeParser& parser, - const TStringBuf& token, - const std::optional& nullValue, - const TCsvParser::TParseMetadata& meta, - const std::string& columnName, - google::protobuf::Arena* arena -) { - Y_ASSERT(arena != nullptr); - try { - TCsvToYdbOnArenaConverter converter(parser, nullValue, arena); - return converter.Convert(token); - } catch (std::exception& e) { - throw FormatError(e, meta, columnName); - } -} - bool TryParse(TTypeParser& parser, const TStringBuf& token, const std::optional& nullValue, TPossibleType& possibleType) { try { TCsvToYdbConverter converter(parser, nullValue); @@ -995,7 +570,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, /* arena = */ nullptr); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); if (row.has_value()) { ++row.value(); } @@ -1027,7 +602,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, arena); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); if (row.has_value()) { ++row.value(); } @@ -1045,8 +620,7 @@ void TCsvParser::ProcessCsvLine( google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename, - google::protobuf::Arena* arena + const TString& filename ) const { NCsvFormat::CsvSplitter splitter(line, Delimeter); auto* structItems = listItems->Add()->mutable_items(); @@ -1064,28 +638,8 @@ void TCsvParser::ProcessCsvLine( } TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { - // TODO: create Ydb::Value on the arena to avoid copying here - // TODO: (despite std::move, we copy from non-arena allocated Ydb::Value into arena-allocated structItems) - if (arena != nullptr) { - TArenaAllocatedValue builtValue = FieldToValueOnArena( - *typeParserIt->get(), nextField, NullValue, meta, *headerIt, arena); - - // if (print.load()) { - // Cerr << "arena: " << static_cast(arena) << Endl; - // Cerr << "builtValue.GetProto()->GetArena(): " << static_cast(builtValue.GetProto()->GetArena()) << Endl; - // Cerr << "structItems->GetArena(): " << static_cast(structItems->GetArena()) << Endl; - // Cerr << "structItems->Add()->GetArena(): " << static_cast(structItems->Add()->GetArena()) << Endl; - // print.store(false); - // } - // Y_ASSERT(arena != nullptr); - // Y_ASSERT(builtValue.GetProto()->GetArena() != nullptr); - // Y_ASSERT(builtValue.GetProto()->GetArena() == structItems->GetArena()); - - *structItems->Add() = std::move(builtValue).ExtractProto(); - } else { - TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); - *structItems->Add() = std::move(builtValue).ExtractProto(); - } + TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); + *structItems->Add() = std::move(builtValue).ExtractProto(); ++typeParserIt; } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index d5feebfc1326..b3a0ec89bcc3 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -110,8 +110,7 @@ class TCsvParser { google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename, - google::protobuf::Arena* arena + const TString& filename ) const; }; diff --git a/ydb/public/sdk/cpp/client/ydb_value/fwd.h b/ydb/public/sdk/cpp/client/ydb_value/fwd.h index d1b26e0091fa..f9f5df1abf5c 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/fwd.h +++ b/ydb/public/sdk/cpp/client/ydb_value/fwd.h @@ -8,7 +8,6 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; -class TArenaAllocatedValueBuilder; template class TValueBuilderBase; diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h index 31a0d904a821..71749a9c1adb 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h @@ -60,7 +60,7 @@ class TParams { std::shared_ptr Impl_; }; -class TParamValueBuilder : public TValueBuilderBase { +class TParamValueBuilder : public TValueBuilderBase { friend class TParamsBuilder; public: TParamsBuilder& Build(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h index 24a6913987b7..06eb71ac0703 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h @@ -8,9 +8,8 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; -class TArenaAllocatedValueBuilder; -template +template class TValueBuilderBase; } // namespace NYdb diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index f863d4bd81e9..89607a5d5db4 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -174,7 +174,6 @@ std::string FormatType(const TType& type); //! To create complex type, corresponding scope should be opened by Begin*/End* calls //! To create complex repeated type, Add* should be called at least once class TTypeBuilder : public TMoveOnly { - template friend class TValueBuilderImpl; public: TTypeBuilder(TTypeBuilder&&); @@ -288,6 +287,7 @@ class TValue { std::shared_ptr Impl_; }; + class TArenaAllocatedValue { public: TArenaAllocatedValue(const TType& type, Ydb::Value* value); @@ -306,6 +306,8 @@ class TArenaAllocatedValue { }; + + class TValueParser : public TMoveOnly { friend class TResultSetParser; public: @@ -425,59 +427,10 @@ class TValueParser : public TMoveOnly { std::unique_ptr Impl_; }; -template class TValueBuilderImpl; -class StackAllocatedValueHolder { -public: - Ydb::Value& Value() { - return ProtoValue_; - } - - Ydb::Value* ValuePtr() { - return &ProtoValue_; - } - -private: - Ydb::Value ProtoValue_; -}; - -class ArenaAllocatedValueHolder { -public: - ArenaAllocatedValueHolder(google::protobuf::Arena* arena) - // value is created lazily on first access - : ArenaAllocatedValue_(nullptr) - , Arena_(arena) { - Y_ASSERT(arena != nullptr); - } - - Ydb::Value& Value() { - createValueIfAbsent(); - return *ArenaAllocatedValue_; - } - - Ydb::Value* ValuePtr() { - createValueIfAbsent(); - return ArenaAllocatedValue_; - } - -private: - void createValueIfAbsent() { - if (ArenaAllocatedValue_ == nullptr) { - ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); - } - } - -private: - Ydb::Value* ArenaAllocatedValue_; - google::protobuf::Arena* Arena_; -}; - - - - -template +template class TValueBuilderBase : public TMoveOnly { friend TDerived; public: @@ -590,21 +543,21 @@ class TValueBuilderBase : public TMoveOnly { protected: TValueBuilderBase(TValueBuilderBase&&); - TValueBuilderBase(ValueHolder holder); + TValueBuilderBase(); - TValueBuilderBase(ValueHolder holder, const TType& type); + TValueBuilderBase(const TType& type); - TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value); + TValueBuilderBase(Ydb::Type& type, Ydb::Value& value); ~TValueBuilderBase(); void CheckValue(); private: - std::unique_ptr> Impl_; + std::unique_ptr Impl_; }; -class TValueBuilder : public TValueBuilderBase { +class TValueBuilder : public TValueBuilderBase { public: TValueBuilder(); @@ -613,13 +566,6 @@ class TValueBuilder : public TValueBuilderBase { -public: - TArenaAllocatedValueBuilder(google::protobuf::Arena* arena); - - TArenaAllocatedValue BuildArenaAllocatedValue(); -}; - } // namespace NYdb template<> diff --git a/ydb/public/sdk/cpp/src/client/params/params.cpp b/ydb/public/sdk/cpp/src/client/params/params.cpp index 87aec4f6e38a..25cb1a71c93c 100644 --- a/ydb/public/sdk/cpp/src/client/params/params.cpp +++ b/ydb/public/sdk/cpp/src/client/params/params.cpp @@ -130,7 +130,7 @@ class TParamsBuilder::TImpl { //////////////////////////////////////////////////////////////////////////////// TParamValueBuilder::TParamValueBuilder(TParamsBuilder& owner, Ydb::Type& typeProto, Ydb::Value& valueProto) - : TValueBuilderBase(StackAllocatedValueHolder(), typeProto, valueProto) + : TValueBuilderBase(typeProto, valueProto) , Owner_(owner) , Finished_(false) {} diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index d3fa0ab97eb5..8626e4b709de 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2066,8 +2066,6 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// -// TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called -template class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; using TMembersMap = std::map; @@ -2091,24 +2089,21 @@ class TValueBuilderImpl { }; public: - TValueBuilderImpl(ValueHolder holder) + TValueBuilderImpl() : TypeBuilder_() - , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_.Value()); + PushPath(ProtoValue_); } - TValueBuilderImpl(ValueHolder holder, const TType& type) + TValueBuilderImpl(const TType& type) : TypeBuilder_() - , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_.Value()); + PushPath(ProtoValue_); GetType().CopyFrom(type.GetProto()); } - TValueBuilderImpl(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) + TValueBuilderImpl(Ydb::Type& type, Ydb::Value& value) : TypeBuilder_(type) - , ValueHolder_(std::move(holder)) { PushPath(value); } @@ -2120,20 +2115,15 @@ class TValueBuilderImpl { } } - TValue BuildStackAllocatedValue() { + TValue BuildValue() { CheckValue(); Ydb::Value value; - value.Swap(&ValueHolder_.Value()); + value.Swap(&ProtoValue_); return TValue(TypeBuilder_.Build(), std::move(value)); } - TArenaAllocatedValue BuildArenaAllocatedValue() { - CheckValue(); - return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_.ValuePtr()); - } - void Bool(bool value) { FillPrimitiveType(EPrimitiveType::Bool); GetValue().set_bool_value(value); @@ -2843,8 +2833,7 @@ class TValueBuilderImpl { private: //TTypeBuilder TypeBuilder_; TTypeBuilder::TImpl TypeBuilder_; - // Ydb::Value ProtoValue_; - ValueHolder ValueHolder_; + Ydb::Value ProtoValue_; std::map StructsMap_; TStackVec Path_; @@ -2853,211 +2842,211 @@ class TValueBuilderImpl { //////////////////////////////////////////////////////////////////////////////// -template -TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; +template +TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; -template -TValueBuilderBase::~TValueBuilderBase() = default; +template +TValueBuilderBase::~TValueBuilderBase() = default; -template -TValueBuilderBase::TValueBuilderBase(ValueHolder holder) - : Impl_(new TValueBuilderImpl(std::move(holder))) {} +template +TValueBuilderBase::TValueBuilderBase() + : Impl_(new TValueBuilderImpl()) {} -template -TValueBuilderBase::TValueBuilderBase(ValueHolder holder, const TType& type) - : Impl_(new TValueBuilderImpl(std::move(holder), type)) {} +template +TValueBuilderBase::TValueBuilderBase(const TType& type) + : Impl_(new TValueBuilderImpl(type)) {} -template -TValueBuilderBase::TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) - : Impl_(new TValueBuilderImpl(std::move(holder), type, value)) {} +template +TValueBuilderBase::TValueBuilderBase(Ydb::Type& type, Ydb::Value& value) + : Impl_(new TValueBuilderImpl(type, value)) {} -template -void TValueBuilderBase::CheckValue() { +template +void TValueBuilderBase::CheckValue() { return Impl_->CheckValue(); } -template -TDerived& TValueBuilderBase::Bool(bool value) { +template +TDerived& TValueBuilderBase::Bool(bool value) { Impl_->Bool(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int8(i8 value) { +template +TDerived& TValueBuilderBase::Int8(i8 value) { Impl_->Int8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint8(ui8 value) { +template +TDerived& TValueBuilderBase::Uint8(ui8 value) { Impl_->Uint8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int16(i16 value) { +template +TDerived& TValueBuilderBase::Int16(i16 value) { Impl_->Int16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint16(ui16 value) { +template +TDerived& TValueBuilderBase::Uint16(ui16 value) { Impl_->Uint16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int32(i32 value) { +template +TDerived& TValueBuilderBase::Int32(i32 value) { Impl_->Int32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint32(ui32 value) { +template +TDerived& TValueBuilderBase::Uint32(ui32 value) { Impl_->Uint32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int64(int64_t value) { +template +TDerived& TValueBuilderBase::Int64(int64_t value) { Impl_->Int64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint64(uint64_t value) { +template +TDerived& TValueBuilderBase::Uint64(uint64_t value) { Impl_->Uint64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Float(float value) { +template +TDerived& TValueBuilderBase::Float(float value) { Impl_->Float(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Double(double value) { +template +TDerived& TValueBuilderBase::Double(double value) { Impl_->Double(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date(const TInstant& value) { +template +TDerived& TValueBuilderBase::Date(const TInstant& value) { Impl_->Date(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime(const TInstant& value) { +template +TDerived& TValueBuilderBase::Datetime(const TInstant& value) { Impl_->Datetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { +template +TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { Impl_->Timestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval(int64_t value) { +template +TDerived& TValueBuilderBase::Interval(int64_t value) { Impl_->Interval(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date32(const i32 value) { +template +TDerived& TValueBuilderBase::Date32(const i32 value) { Impl_->Date32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime64(const int64_t value) { +template +TDerived& TValueBuilderBase::Datetime64(const int64_t value) { Impl_->Datetime64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { +template +TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { Impl_->Timestamp64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval64(const int64_t value) { +template +TDerived& TValueBuilderBase::Interval64(const int64_t value) { Impl_->Interval64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDate(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDate(const std::string& value) { Impl_->TzDate(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { Impl_->TzDatetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { +template +TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { Impl_->TzTimestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::String(const std::string& value) { +template +TDerived& TValueBuilderBase::String(const std::string& value) { Impl_->String(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Utf8(const std::string& value) { +template +TDerived& TValueBuilderBase::Utf8(const std::string& value) { Impl_->Utf8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Yson(const std::string& value) { +template +TDerived& TValueBuilderBase::Yson(const std::string& value) { Impl_->Yson(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Json(const std::string& value) { +template +TDerived& TValueBuilderBase::Json(const std::string& value) { Impl_->Json(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { +template +TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { Impl_->Uuid(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { +template +TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { Impl_->JsonDocument(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DyNumber(const std::string& value) { +template +TDerived& TValueBuilderBase::DyNumber(const std::string& value) { Impl_->DyNumber(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { +template +TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { Impl_->Decimal(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Pg(const TPgValue& value) { +template +TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->Pg(value); return static_cast(*this); } @@ -3078,369 +3067,359 @@ TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->EndOptional(); \ return static_cast(*this); -template -TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { SET_OPT_VALUE_MAYBE(Bool); } -template -TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int8); } -template -TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint8); } -template -TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int16); } -template -TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint16); } -template -TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int32); } -template -TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint32); } -template -TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int64); } -template -TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint64); } -template -TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { SET_OPT_VALUE_MAYBE(Float); } -template -TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { SET_OPT_VALUE_MAYBE(Double); } -template -TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date); } -template -TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime); } -template -TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp); } -template -TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval); } -template -TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date32); } -template -TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime64); } -template -TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp64); } -template -TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval64); } -template -TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDate); } -template -TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDatetime); } -template -TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzTimestamp); } -template -TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { SET_OPT_VALUE_MAYBE(String); } -template -TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Utf8); } -template -TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Yson); } -template -TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Json); } -template -TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uuid); } -template -TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { SET_OPT_VALUE_MAYBE(JsonDocument); } -template -TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { SET_OPT_VALUE_MAYBE(DyNumber); } -template -TDerived& TValueBuilderBase::BeginOptional() { +template +TDerived& TValueBuilderBase::BeginOptional() { Impl_->BeginOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndOptional() { +template +TDerived& TValueBuilderBase::EndOptional() { Impl_->EndOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional() { +template +TDerived& TValueBuilderBase::EmptyOptional() { Impl_->EmptyOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginList() { +template +TDerived& TValueBuilderBase::BeginList() { Impl_->BeginList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndList() { +template +TDerived& TValueBuilderBase::EndList() { Impl_->EndList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem() { +template +TDerived& TValueBuilderBase::AddListItem() { Impl_->AddListItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { Impl_->AddListItem(itemValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { Impl_->AddListItem(std::move(itemValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { Impl_->EmptyList(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList() { +template +TDerived& TValueBuilderBase::EmptyList() { Impl_->EmptyList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginStruct() { +template +TDerived& TValueBuilderBase::BeginStruct() { Impl_->BeginStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { Impl_->AddMember(memberName); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { Impl_->AddMember(memberName, memberValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { Impl_->AddMember(memberName, std::move(memberValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndStruct() { +template +TDerived& TValueBuilderBase::EndStruct() { Impl_->EndStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTuple() { +template +TDerived& TValueBuilderBase::BeginTuple() { Impl_->BeginTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement() { +template +TDerived& TValueBuilderBase::AddElement() { Impl_->AddElement(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { +template +TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { Impl_->AddElement(elementValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTuple() { +template +TDerived& TValueBuilderBase::EndTuple() { Impl_->EndTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginDict() { +template +TDerived& TValueBuilderBase::BeginDict() { Impl_->BeginDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddDictItem() { +template +TDerived& TValueBuilderBase::AddDictItem() { Impl_->AddDictItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey() { +template +TDerived& TValueBuilderBase::DictKey() { Impl_->DictKey(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { +template +TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { Impl_->DictKey(keyValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload() { +template +TDerived& TValueBuilderBase::DictPayload() { Impl_->DictPayload(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { +template +TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { Impl_->DictPayload(payloadValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndDict() { +template +TDerived& TValueBuilderBase::EndDict() { Impl_->EndDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { +template +TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { Impl_->EmptyDict(keyType, payloadType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict() { +template +TDerived& TValueBuilderBase::EmptyDict() { Impl_->EmptyDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { +template +TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { Impl_->BeginTagged(tag); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTagged() { +template +TDerived& TValueBuilderBase::EndTagged() { Impl_->EndTagged(); return static_cast(*this); } //////////////////////////////////////////////////////////////////////////////// -template class TValueBuilderBase; -template class TValueBuilderBase; -template class TValueBuilderBase; +template class TValueBuilderBase; +template class TValueBuilderBase; //////////////////////////////////////////////////////////////////////////////// TValueBuilder::TValueBuilder() - : TValueBuilderBase(StackAllocatedValueHolder()) {} + : TValueBuilderBase() {} TValueBuilder::TValueBuilder(const TType& type) - : TValueBuilderBase(StackAllocatedValueHolder(), type) {} + : TValueBuilderBase(type) {} TValue TValueBuilder::Build() { - return Impl_->BuildStackAllocatedValue(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TArenaAllocatedValueBuilder::TArenaAllocatedValueBuilder(google::protobuf::Arena* arena) - : TValueBuilderBase(ArenaAllocatedValueHolder(arena)) {} - -TArenaAllocatedValue TArenaAllocatedValueBuilder::BuildArenaAllocatedValue() { - return Impl_->BuildArenaAllocatedValue(); + return Impl_->BuildValue(); } } // namespace NYdb From 205bfb458aabeead538668a716e4871762e49c9a Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 12:29:16 +0000 Subject: [PATCH 20/35] refactor: remove commented code --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 2 -- ydb/public/lib/ydb_cli/import/import.cpp | 13 ------------- 2 files changed, 15 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 2ad122ced30f..ca5c578dd49a 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -612,8 +612,6 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( return TArenaAllocatedValue(ResultListType.value(), value); } -std::atomic_bool print = true; - // Helper method to process a single CSV line void TCsvParser::ProcessCsvLine( const TString& line, diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 7414b1fe4edc..dcfd28efe43e 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1142,18 +1142,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - // auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { - // try { - // return parser.BuildList(buffer, filePath, row); - // } catch (const std::exception& e) { - // if (!Failed.exchange(true)) { - // ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); - // } - // jobInflightManager->ReleaseJob(); - // throw; - // } - // }; - auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { try { return parser.BuildListOnArena(buffer, filePath, arena, row); @@ -1167,7 +1155,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; // TODO: create arena here and pass it to UpsertTValueBufferOnArena? - // UpsertTValueBuffer(dbPath, std::move(buildFunc)) UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); From d3961f2ad9091154aed921382b58e9f935a087ab Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 28 Mar 2025 19:34:01 +0000 Subject: [PATCH 21/35] feat(TLinesSplitter::ConsumeLine): replace bool negation with quote chars counting Benchmarking results (see commit 62018144348 for details): 1. Previous solution (at commit 62018144348): Elapsed: 17.7 sec. Total read size: 8.33GiB. Average processing speed: 482MiB/s. 2. Current solution: Elapsed: 10.6 sec. Total read size: 8.33GiB. Average processing speed: 804MiB/s (up to 817MiB/s). Cherry-picked the commit: `773a3a5` See: https://github.com/Vladislav0Art/ydb/commit/773a3a5a860d398bba34bac548aaf59e87b4c983 --- library/cpp/string_utils/csv/csv.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp index fd3d932fcd5c..823c96d6904a 100644 --- a/library/cpp/string_utils/csv/csv.cpp +++ b/library/cpp/string_utils/csv/csv.cpp @@ -1,3 +1,5 @@ +#include + #include "csv.h" TStringBuf NCsvFormat::CsvSplitter::Consume() { @@ -70,11 +72,12 @@ TString NCsvFormat::TLinesSplitter::ConsumeLine() { TString result; TString line; while (Input.ReadLine(line)) { - for (auto it = line.begin(); it != line.end(); ++it) { - if (*it == Quote) { - Escape = !Escape; - } + const size_t quoteCount = std::count(line.cbegin(), line.cend(), Quote); + // TODO: use `& 1`? + if (quoteCount % 2 == 1) { + Escape = !Escape; } + if (!result) { result = line; } else { From 67ca3186d46f3fde27092e35e57ca4becc143213 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 14:16:53 +0000 Subject: [PATCH 22/35] feat(TCsvParser): add method TCsvParser::GetHeaderRow() --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 4 ++++ ydb/public/lib/ydb_cli/common/csv_parser.h | 1 + 2 files changed, 5 insertions(+) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index ca5c578dd49a..460629a2af2c 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -812,6 +812,10 @@ const TVector& TCsvParser::GetHeader() { return Header; } +const TString& TCsvParser::GetHeaderRow() const { + return HeaderRow; +} + } } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index b3a0ec89bcc3..faf9d5cb11f3 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -81,6 +81,7 @@ class TCsvParser { void BuildLineType(); const TVector& GetHeader(); + const TString& GetHeaderRow() const; void ParseLineTypes(TString& line, TPossibleTypes& possibleTypes, const TParseMetadata& meta); private: From 12df05fc8509cad9b2b5360cb977baa4fd4fff02 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 14:17:43 +0000 Subject: [PATCH 23/35] feat(import): implement upload via Apcahe Arrow with fallback on YDB Types with Protobuf Arena --- ydb/public/lib/ydb_cli/import/import.cpp | 181 ++++++++++++++++++++--- 1 file changed, 161 insertions(+), 20 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index dcfd28efe43e..ebb0421929ae 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -561,6 +562,12 @@ class TImportFileClient::TImpl { TAsyncStatus UpsertTValueBuffer(const TString& dbPath, TValueBuilder& builder); TAsyncStatus UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc); + TAsyncStatus UpsertTValueBufferParquet( + const TString& dbPath, + std::shared_ptr batch, + const arrow::ipc::IpcWriteOptions& writeOptions + ); + TAsyncStatus UpsertTValueBufferOnArena( const TString& dbPath, std::function&& buildFunc); @@ -1022,6 +1029,45 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, } +inline +TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( + const TString& dbPath, + std::shared_ptr batch, + const arrow::ipc::IpcWriteOptions& writeOptions +) { + if (!RequestsInflight->try_acquire()) { + if (Settings.Verbose_ && Settings.NewlineDelimited_) { + if (!InformedAboutLimit.exchange(true)) { + Cerr << (TStringBuilder() << "@ (each '@' means max request inflight is reached and a worker thread is waiting for " + "any response from database)" << Endl); + } else { + Cerr << '@'; + } + } + RequestsInflight->acquire(); + } + + auto retryFunc = [parquet = NYdb_cli::NArrow::SerializeBatch(batch, writeOptions), + schema = NYdb_cli::NArrow::SerializeSchema(*batch->schema()), + dbPath](NTable::TTableClient& client) { + return client.BulkUpsert(dbPath, NTable::EDataFormat::ApacheArrow, parquet, schema) + .Apply([](const NTable::TAsyncBulkUpsertResult& result) { + return TStatus(result.GetValueSync()); + }); + }; + + return TableClient->RetryOperation(std::move(retryFunc), RetrySettings) + .Apply([this](const TAsyncStatus& asyncStatus) { + NYdb::TStatus status = asyncStatus.GetValueSync(); + if (!status.IsSuccess()) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(status); + } + } + RequestsInflight->release(); + return asyncStatus; + }); +} @@ -1051,6 +1097,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( // Running heavy building task on processing pool: return NThreading::Async(std::move(buildTValueAndSendRequest), *ProcessingPool); }; + if (!RequestsInflight->try_acquire()) { if (Settings.Verbose_ && Settings.NewlineDelimited_) { if (!InformedAboutLimit.exchange(true)) { @@ -1062,6 +1109,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( } RequestsInflight->acquire(); } + return TableClient->RetryOperation(std::move(retryFunc), RetrySettings) .Apply([this](const TAsyncStatus& asyncStatus) { NYdb::TStatus status = asyncStatus.GetValueSync(); @@ -1108,6 +1156,11 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, ui64 skippedBytes = 0; ui64 nextSkipBorder = VerboseModeStepSize; + std::cerr << "Settings.Header_: " << Settings.Header_ << std::endl; + std::cerr << "Settings.SkipRows_: " << Settings.SkipRows_ << std::endl; + std::cerr << "rowsToSkip: " << rowsToSkip << std::endl; + std::cerr << "row: " << row << std::endl; + TString line; std::vector inFlightRequests; std::vector buffer; @@ -1141,32 +1194,120 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; + // Note: table = dbPath (path to the table on the server) + auto columns = DbTableInfo->GetTableColumns(); + + std::cerr << "columns: " << columns.size() << std::endl; + for (const auto& column : columns) { + std::cerr << "column: " << column.Name << " " << column.Type.ToString() << std::endl; + } + + const Ydb::Formats::CsvSettings csvSettings = ([this]() { + Ydb::Formats::CsvSettings settings; + settings.set_delimiter(Settings.Delimiter_); + settings.set_header(Settings.Header_); + if (Settings.NullValue_.has_value()) { + settings.set_null_value(Settings.NullValue_.value()); + } + settings.set_skip_rows(Settings.SkipRows_); + return settings; + }()); + + auto writeOptions = arrow::ipc::IpcWriteOptions::Defaults(); + // TODO: what codec to use? ZSTD is used in ydb_workload_import.cpp; use it here as well? + constexpr auto codecType = arrow::Compression::type::UNCOMPRESSED; + writeOptions.codec = *arrow::util::Codec::Create(codecType); + + // determines if we should fallback to YDB Types (TValue with Protobuf arena) when Apache Arrow failed. + // Apache Arrow tends to be faster but doesn't support all YDB types (e.g. TzDatetime). On the other hand, + // YDB Types are slower but support all YDB types. + std::atomic_bool fallbackToYDBTypes = false; + + auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { - try { - return parser.BuildListOnArena(buffer, filePath, arena, row); - } catch (const std::exception& e) { - if (!Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); + // by default, we attempt to send data via Apache Arrow, if it fails, we fallback to YDB Types (TValue with Protobuf arena) + if (!fallbackToYDBTypes.load()) { + const i64 estimatedCsvLineLength = (!buffer.empty() ? 2 * buffer.front().size() : 10'000); + TStringBuilder data; + data.reserve((buffer.size() + (Settings.Header_ ? 1 : 0)) * estimatedCsvLineLength); + // insert header if it is present in the given csv file + if (Settings.Header_) { + data << parser.GetHeaderRow() << Endl; + } + data << JoinSeq("\n", buffer) << Endl; + + // I: attempt to send data via Apache Arrow + // if header is present, it is expected to be the first line of the data + TString error; + auto arrowCsv = NKikimr::NFormats::TArrowCSVTable::Create(columns, Settings.Header_); + if (arrowCsv.ok()) { + if (auto batch = arrowCsv->ReadSingleBatch(data, csvSettings, error)) { + if (!error) { + // batch was read successfully, sending data via Apache Arrow + UpsertTValueBufferParquet(dbPath, std::move(batch), writeOptions) + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + jobInflightManager->ReleaseJob(); + if (asyncStatus.GetValueSync().IsSuccess()) { + batchStatus->Completed = true; + if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + "Couldn't add worker func to save progress")); + } + } + return asyncStatus; + }); + } + else { + error = "Error while reading a batch from Apache Arrow: " + error; + fallbackToYDBTypes.store(true); + } + } + else { + error = "Could not read a batch from Apache Arrow"; + fallbackToYDBTypes.store(true); } - jobInflightManager->ReleaseJob(); - throw; } - }; + else { + error = arrowCsv.status().ToString(); + fallbackToYDBTypes.store(true); + } + + // print the error if fallback requested + if (fallbackToYDBTypes.load()) { + std::cerr << "Error when trying to import via Apache Arrow: '" << error << "'.\n" + << "Falling back to import via YDB Types (no action needed from your side)." << std::endl; + } + } - // TODO: create arena here and pass it to UpsertTValueBufferOnArena? - UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) - .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { - jobInflightManager->ReleaseJob(); - if (asyncStatus.GetValueSync().IsSuccess()) { - batchStatus->Completed = true; - if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, - "Couldn't add worker func to save progress")); + // check if fallback requested (after first Apache Arrow failure, we will always fallback to YDB Types) + if (fallbackToYDBTypes.load()) { + // II: fallback to TValue with Protobuf arena because Apache Arrow failed + auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { + try { + return parser.BuildListOnArena(buffer, filePath, arena, row); + } catch (const std::exception& e) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); } + jobInflightManager->ReleaseJob(); + throw; } - return asyncStatus; - }); + }; + + // TODO: create arena here and pass it to UpsertTValueBufferOnArena? + UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + jobInflightManager->ReleaseJob(); + if (asyncStatus.GetValueSync().IsSuccess()) { + batchStatus->Completed = true; + if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + "Couldn't add worker func to save progress")); + } + } + return asyncStatus; + }); + } }; for (ui32 i = 0; i < rowsToSkip; ++i) { From 7308e9258a5d897549cdc38f5fa91c0be47122bb Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 14:27:18 +0000 Subject: [PATCH 24/35] refactor: remove newlines and commented code --- .../sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 4 ---- .../impl/ydb_internal/grpc_connections/grpc_connections.h | 8 -------- ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp | 7 ------- 3 files changed, 19 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 89607a5d5db4..40984ac36247 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -305,9 +305,6 @@ class TArenaAllocatedValue { std::shared_ptr Impl_; }; - - - class TValueParser : public TMoveOnly { friend class TResultSetParser; public: @@ -429,7 +426,6 @@ class TValueParser : public TMoveOnly { class TValueBuilderImpl; - template class TValueBuilderBase : public TMoveOnly { friend TDerived; diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 4a457de8642a..3e4c4c2c8c38 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -473,8 +473,6 @@ class TGRpcConnectionsImpl std::move(context)); } - - template void RunDeferredOnArena( TRequest* request, @@ -528,9 +526,6 @@ class TGRpcConnectionsImpl std::move(context)); } - - - // Run request using discovery endpoint. // Mostly usefull to make calls from credential provider template @@ -587,9 +582,6 @@ class TGRpcConnectionsImpl context); } - - - template void RunDeferredOnArena( TRequest* request, diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index ffeca4647695..d5dceab74d65 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1023,12 +1023,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( const TBulkUpsertSettings& settings ) { auto request = MakeOperationRequestOnArena(settings, arena); - // std::cout << "request->GetArena(): " << request->GetArena() << "\n" - // << "request->mutable_rows()->GetArena(): " << request->mutable_rows()->GetArena() << "\n" - // << "Value->GetArena(): " << rows.second->GetArena() << "\n" - // << "given arena: " << arena << std::endl; - // assert(request->GetArena() == arena); - // assert(request->mutable_rows()->GetArena() == arena); request->set_table(TStringType{table}); // TODO: Ydb::Type still gets copied because request is arena-allocated and rows' Type is not *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); @@ -1058,7 +1052,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); From 6effb23d52e9110dd867cb965379908c46b1efbc Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 17:15:54 +0000 Subject: [PATCH 25/35] Revert "feat(TLinesSplitter::ConsumeLine): replace bool negation with quote chars counting" This reverts commit d3961f2ad9091154aed921382b58e9f935a087ab. --- library/cpp/string_utils/csv/csv.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp index 823c96d6904a..fd3d932fcd5c 100644 --- a/library/cpp/string_utils/csv/csv.cpp +++ b/library/cpp/string_utils/csv/csv.cpp @@ -1,5 +1,3 @@ -#include - #include "csv.h" TStringBuf NCsvFormat::CsvSplitter::Consume() { @@ -72,12 +70,11 @@ TString NCsvFormat::TLinesSplitter::ConsumeLine() { TString result; TString line; while (Input.ReadLine(line)) { - const size_t quoteCount = std::count(line.cbegin(), line.cend(), Quote); - // TODO: use `& 1`? - if (quoteCount % 2 == 1) { - Escape = !Escape; + for (auto it = line.begin(); it != line.end(); ++it) { + if (*it == Quote) { + Escape = !Escape; + } } - if (!result) { result = line; } else { From bd2278a68bf0a60d5ff18fc44026ef4d8844a519 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 17:24:54 +0000 Subject: [PATCH 26/35] refactor: remove redundant includes from value.h --- ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 40984ac36247..c4367cad1202 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -2,12 +2,10 @@ #include "fwd.h" -#include #include #include #include -#include namespace Ydb { class Type; From 65d81487a74bf9809a05468483bd4e0bfa1e08fe Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 17:48:13 +0000 Subject: [PATCH 27/35] refactor: remove TArenaAllocatedValue class (use TValue instead) --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 4 +- ydb/public/lib/ydb_cli/common/csv_parser.h | 2 +- ydb/public/lib/ydb_cli/import/import.cpp | 9 +-- .../include/ydb-cpp-sdk/client/table/table.h | 2 +- .../include/ydb-cpp-sdk/client/value/value.h | 18 +----- .../src/client/table/impl/table_client.cpp | 2 +- .../cpp/src/client/table/impl/table_client.h | 13 +++- ydb/public/sdk/cpp/src/client/table/table.cpp | 5 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 63 ++++++------------- 9 files changed, 45 insertions(+), 73 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 460629a2af2c..26beb8688a00 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -582,7 +582,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f -TArenaAllocatedValue TCsvParser::BuildListOnArena( +TValue TCsvParser::BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, @@ -609,7 +609,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( } // Return a TValue that references the arena-allocated message - return TArenaAllocatedValue(ResultListType.value(), value); + return TValue(ResultListType.value(), value); } // Helper method to process a single CSV line diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index faf9d5cb11f3..a2355b991bf4 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -72,7 +72,7 @@ class TCsvParser { TValue BuildList(const std::vector& lines, const TString& filename, std::optional row = std::nullopt) const; - TArenaAllocatedValue BuildListOnArena( + TValue BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index ebb0421929ae..6327dbb8f65a 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -569,7 +569,7 @@ class TImportFileClient::TImpl { ); TAsyncStatus UpsertTValueBufferOnArena( - const TString& dbPath, std::function&& buildFunc); + const TString& dbPath, std::function&& buildFunc); TStatus UpsertJson(IInputStream &input, const TString &dbPath, std::optional inputSizeHint, ProgressCallbackFunc & progressCallback); @@ -1073,11 +1073,11 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( - const TString& dbPath, std::function&& buildFunc) { + const TString& dbPath, std::function&& buildFunc) { auto arena = std::make_shared(); // For the first attempt values are built before acquiring request inflight semaphore - std::optional prebuiltValue = buildFunc(arena.get()); + std::optional prebuiltValue = buildFunc(arena.get()); auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue), arena = std::move(arena)] @@ -1085,8 +1085,9 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue, arena]() { // For every retry attempt after first request build value from strings again // to prevent copying data in retryFunc in a happy way when there is only one request - TArenaAllocatedValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); + TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; + return tableClient.BulkUpsertUnretryableArenaAllocated( dbPath, std::move(builtValue), arena.get(), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index f2fdd5a64758..4e488cf18b98 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1269,7 +1269,7 @@ class TTableClient { TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - TArenaAllocatedValue&& rows, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index c4367cad1202..a142779d8633 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -271,6 +271,7 @@ class TValue { public: TValue(const TType& type, const Ydb::Value& valueProto); TValue(const TType& type, Ydb::Value&& valueProto); + TValue(const TType& type, Ydb::Value* arenaAllocatedValueProto); const TType& GetType() const; TType& GetType(); @@ -286,23 +287,6 @@ class TValue { }; -class TArenaAllocatedValue { -public: - TArenaAllocatedValue(const TType& type, Ydb::Value* value); - - const TType& GetType() const; - TType& GetType(); - - const Ydb::Value* GetProto() const; - Ydb::Value* GetProto(); - - Ydb::Value&& ExtractProto() &&; - -private: - class TImpl; - std::shared_ptr Impl_; -}; - class TValueParser : public TMoveOnly { friend class TResultSetParser; public: diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index d5dceab74d65..5cf912bada51 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1018,7 +1018,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryable(const std::str TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( const std::string& table, - TArenaAllocatedValue&& rows, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ) { diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 5acbd4e3730f..5530847956be 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -144,9 +144,20 @@ class TTableClient::TImpl: public TClientImplCommon, public */ TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, + * therefore, it is not retryable with the same `rows` object. + * + * The `Ydb::Value` is expected to be arena-allocated. + * + * See: https://protobuf.dev/reference/cpp/arenas + * + * @param arena The arena to allocate the `Ydb::Value` in. + * @param settings The settings for the bulk upsert operation. + */ TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - TArenaAllocatedValue&& rows, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index a3aa90c16616..66c4dd21e843 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1463,8 +1463,9 @@ TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& ta return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated(const std::string& table, - TArenaAllocatedValue&& rows, +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated( + const std::string& table, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 8626e4b709de..94e16990b310 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -1050,32 +1050,28 @@ class TValue::TImpl { public: TImpl(const TType& type, const Ydb::Value& valueProto) : Type_(type) - , ProtoValue_(valueProto) {} + , ProtoValue_(valueProto) + , ArenaAllocatedValueProto_(nullptr) {} TImpl(const TType& type, Ydb::Value&& valueProto) : Type_(type) - , ProtoValue_(std::move(valueProto)) {} - - TType Type_; - Ydb::Value ProtoValue_; -}; - -/** -* Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. -*/ -class TArenaAllocatedValue::TImpl { -public: - TImpl(const TType& type, Ydb::Value* value) + , ProtoValue_(std::move(valueProto)) + , ArenaAllocatedValueProto_(nullptr) {} + + /** + * Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. + * The `Ydb::Value` is expected to be arena-allocated. + * + * See: https://protobuf.dev/reference/cpp/arenas + */ + TImpl(const TType& type, Ydb::Value* arenaAllocatedValueProto) : Type_(type) - , ArenaAllocatedProtoValue_(value) { - // assert that the value is indeed arena-allocated - Y_ASSERT(ArenaAllocatedProtoValue_ != nullptr); - Y_ASSERT(ArenaAllocatedProtoValue_->GetArena() != nullptr); - } + , ProtoValue_{} + , ArenaAllocatedValueProto_(arenaAllocatedValueProto) {} - // TODO: make Ydb::Type also arena-allocated? TType Type_; - Ydb::Value* ArenaAllocatedProtoValue_; + Ydb::Value ProtoValue_; + Ydb::Value* ArenaAllocatedValueProto_; }; //////////////////////////////////////////////////////////////////////////////// @@ -1086,6 +1082,9 @@ TValue::TValue(const TType& type, const Ydb::Value& valueProto) TValue::TValue(const TType& type, Ydb::Value&& valueProto) : Impl_(new TImpl(type, std::move(valueProto))) {} +TValue::TValue(const TType& type, Ydb::Value* arenaAllocatedValueProto) + : Impl_(new TImpl(type, arenaAllocatedValueProto)) {} + const TType& TValue::GetType() const { return Impl_->Type_; } @@ -1106,30 +1105,6 @@ Ydb::Value&& TValue::ExtractProto() && { return std::move(Impl_->ProtoValue_); } - -TArenaAllocatedValue::TArenaAllocatedValue(const TType& type, Ydb::Value* value) - : Impl_(new TImpl(type, value)) {} - -const TType& TArenaAllocatedValue::GetType() const { - return Impl_->Type_; -} - -TType& TArenaAllocatedValue::GetType() { - return Impl_->Type_; -} - -const Ydb::Value* TArenaAllocatedValue::GetProto() const { - return Impl_->ArenaAllocatedProtoValue_; -} - -Ydb::Value* TArenaAllocatedValue::GetProto() { - return Impl_->ArenaAllocatedProtoValue_; -} - -Ydb::Value&& TArenaAllocatedValue::ExtractProto() && { - return std::move(*Impl_->ArenaAllocatedProtoValue_); -} - //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From 4836e43794486610446a2667595b46fb0ddd47ac Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 18:16:25 +0000 Subject: [PATCH 28/35] feat: rename unretryable methods to BulkUpsertUnsafe and add docs --- ydb/public/lib/ydb_cli/import/import.cpp | 4 +- .../include/ydb-cpp-sdk/client/table/table.h | 63 +++++++++++++++++-- .../src/client/table/impl/table_client.cpp | 5 +- .../cpp/src/client/table/impl/table_client.h | 21 +------ ydb/public/sdk/cpp/src/client/table/table.cpp | 8 +-- 5 files changed, 70 insertions(+), 31 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 6327dbb8f65a..b001a51ec8bf 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -995,7 +995,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, // to prevent copying data in retryFunc in a happy way when there is only one request TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(); prebuiltValue = std::nullopt; - return tableClient.BulkUpsertUnretryable(dbPath, std::move(builtValue), UpsertSettings) + return tableClient.BulkUpsertUnretryableUnsafe(dbPath, std::move(builtValue), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); return NThreading::MakeFuture(status); @@ -1088,7 +1088,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; - return tableClient.BulkUpsertUnretryableArenaAllocated( + return tableClient.BulkUpsertUnretryableArenaAllocatedUnsafe( dbPath, std::move(builtValue), arena.get(), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 4e488cf18b98..3e1dcf28dc15 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1261,13 +1261,68 @@ class TTableClient { NThreading::TFuture Stop(); /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, - * therefore, it is not retryable with the same `rows` object. + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model. + * + * This method is not retryable with the same `rows` object because it moves the underlying data + * from the TValue object into the request proto model. The TValue class uses a shared pointer + * to store its implementation data, so when you make a copy of a TValue object, both copies + * share the same underlying data. When this method moves the data from one TValue instance, + * all other instances sharing the same implementation pointer will be left with moved-from data. + * + * Example of incorrect usage: + * ``` + * TValue originalValue = BuildValue(); + * retry { + * TValue copy = originalValue; // Both originalValue and copy share the same data + * client.BulkUpsertUnretryableUnsafe(std::move(copy)); // Moves data from copy, leaving originalValue in moved-from state + * } + * ``` + * CAUTION: It is unsafe to use the instance of `originalValue` because its underlying data is moved! + * + * To safely retry the operation, you need to create a new TValue with fresh data for each attempt. */ - TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, + TAsyncBulkUpsertResult BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); - TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model. The `Ydb::Value` of `TValue` must be arena-allocated. + * + * This method is not retryable with the same `rows` object because it moves the underlying data + * from the `TValue` object into the request proto model. The `TValue` class uses a shared pointer + * to store its implementation data, so when you make a copy of a `TValue` object, both copies + * share the same underlying data. When this method moves the data from one `TValue` instance, + * all other instances sharing the same implementation pointer will be left with moved-from data. + * + * IMPORTANT: The `rows` parameter must contain arena-allocated protobuf data. This method + * should only be called when the `TValue`'s underlying data was allocated using the provided + * protobuf arena. Using this method with non-arena-allocated data will lead to undefined behavior. + * It is the caller's responsibility to ensure that the lifetime of the used arena is longer than the lifetime of the `TValue` object during the operation. + * + * @see: https://protobuf.dev/reference/cpp/arenas + * + * Example of incorrect usage: + * ``` + * TValue originalValue = BuildValue(); // Not arena-allocated + * retry { + * TValue copy = originalValue; // Both originalValue and copy share the same data + * client.BulkUpsertUnretryableArenaAllocated(std::move(copy), arena); // WRONG: data not arena-allocated + * } + * ``` + * + * Example of correct usage: + * ``` + * google::protobuf::Arena arena; + * TValue arenaValue = BuildValueOnArena(&arena); // Value built using arena allocation + * client.BulkUpsertUnretryableArenaAllocated(std::move(arenaValue), &arena); + * ``` + * + * To safely retry the operation, you need to create a new `TValue` with fresh arena-allocated data for each attempt. + * + * @param arena The protobuf arena that was used to allocate the data in `rows`. Must be the same + * arena that was used to create the TValue's underlying data. + * @param settings The settings for the bulk upsert operation. + */ + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 5cf912bada51..fcca6eb36bd3 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -989,7 +989,7 @@ void TTableClient::TImpl::SetStatCollector(const NSdkStats::TStatCollector::TCli SessionRemovedDueBalancing.Set(collector.SessionRemovedDueBalancing); } -TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); *request.mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); @@ -1016,7 +1016,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryable(const std::str } -TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, @@ -1024,7 +1024,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( ) { auto request = MakeOperationRequestOnArena(settings, arena); request->set_table(TStringType{table}); - // TODO: Ydb::Type still gets copied because request is arena-allocated and rows' Type is not *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); *request->mutable_rows()->mutable_value() = std::move(rows).ExtractProto(); diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 5530847956be..2a7e8c43a9e2 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -138,24 +138,9 @@ class TTableClient::TImpl: public TClientImplCommon, public void SetStatCollector(const NSdkStats::TStatCollector::TClientStatCollector& collector); - /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, - * therefore, it is not retryable with the same `rows` object. - */ - TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); - - /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, - * therefore, it is not retryable with the same `rows` object. - * - * The `Ydb::Value` is expected to be arena-allocated. - * - * See: https://protobuf.dev/reference/cpp/arenas - * - * @param arena The arena to allocate the `Ydb::Value` in. - * @param settings The settings for the bulk upsert operation. - */ - TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + TAsyncBulkUpsertResult BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index 66c4dd21e843..a6af236d313c 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1457,19 +1457,19 @@ NThreading::TFuture TTableClient::Stop() { return Impl_->Stop(); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& table, TValue&& rows, +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { - return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); + return Impl_->BulkUpsertUnretryableUnsafe(table, std::move(rows), settings); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated( +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings) { - return Impl_->BulkUpsertUnretryableArenaAllocated(table, std::move(rows), arena, settings); + return Impl_->BulkUpsertUnretryableArenaAllocatedUnsafe(table, std::move(rows), arena, settings); } TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, From 8f316a9b065b937ca33a9b922698fce051e46be7 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 18:48:10 +0000 Subject: [PATCH 29/35] refactor: move logic of RunDeferred and RUnDeferredOnArena into impl method --- .../grpc_connections/grpc_connections.h | 161 +++++++++--------- 1 file changed, 77 insertions(+), 84 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 3e4c4c2c8c38..2a1f3346370f 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -276,10 +276,6 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - - - - template void RunOnArena( TRequest* request, @@ -416,10 +412,6 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - - - - template void RunDeferred( TRequest&& request, @@ -429,48 +421,17 @@ class TGRpcConnectionsImpl TDuration deferredTimeout, const TRpcRequestSettings& requestSettings, bool poll = false, - std::shared_ptr context = nullptr) - { - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, status); - return; - } - - auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] - (TResponse* response, TPlainStatus status) mutable - { - if (response) { - Ydb::Operations::Operation* operation = response->mutable_operation(); - if (!operation->ready() && poll) { - auto action = MakeIntrusive( - operation->id(), - std::move(userResponseCb), - this, - std::move(context), - deferredTimeout, - dbState, - status.Endpoint); - - action->Start(); - } else { - NYdb::NIssue::TIssues opIssues; - NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); - userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), - status.Endpoint, std::move(status.Metadata)}); - } - } else { - userResponseCb(nullptr, status); - } - }; - - Run( + std::shared_ptr context = nullptr) { + RunDeferredImpl( std::forward(request), - responseCb, + std::move(userResponseCb), rpc, dbState, + deferredTimeout, requestSettings, - std::move(context)); + poll, + std::move(context) + ); } template @@ -482,48 +443,17 @@ class TGRpcConnectionsImpl TDuration deferredTimeout, const TRpcRequestSettings& requestSettings, bool poll = false, - std::shared_ptr context = nullptr) - { - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, status); - return; - } - - auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] - (TResponse* response, TPlainStatus status) mutable - { - if (response) { - Ydb::Operations::Operation* operation = response->mutable_operation(); - if (!operation->ready() && poll) { - auto action = MakeIntrusive( - operation->id(), - std::move(userResponseCb), - this, - std::move(context), - deferredTimeout, - dbState, - status.Endpoint); - - action->Start(); - } else { - NYdb::NIssue::TIssues opIssues; - NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); - userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), - status.Endpoint, std::move(status.Metadata)}); - } - } else { - userResponseCb(nullptr, status); - } - }; - - RunOnArena( + std::shared_ptr context = nullptr) { + RunDeferredImpl( request, - responseCb, + std::move(userResponseCb), rpc, dbState, + deferredTimeout, requestSettings, - std::move(context)); + poll, + std::move(context) + ); } // Run request using discovery endpoint. @@ -836,6 +766,69 @@ template const TLog& GetLog() const override; private: + template + void RunDeferredImpl( + std::conditional_t request, + TDeferredOperationCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + TDuration deferredTimeout, + const TRpcRequestSettings& requestSettings, + bool poll = false, + std::shared_ptr context = nullptr) { + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, status); + return; + } + + auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] + (TResponse* response, TPlainStatus status) mutable + { + if (response) { + Ydb::Operations::Operation* operation = response->mutable_operation(); + if (!operation->ready() && poll) { + auto action = MakeIntrusive( + operation->id(), + std::move(userResponseCb), + this, + std::move(context), + deferredTimeout, + dbState, + status.Endpoint); + + action->Start(); + } else { + NYdb::NIssue::TIssues opIssues; + NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); + userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), + status.Endpoint, std::move(status.Metadata)}); + } + } else { + userResponseCb(nullptr, status); + } + }; + + if constexpr (RequestOnArena) { + RunOnArena( + request, + responseCb, + rpc, + dbState, + requestSettings, + std::move(context)); + } + else { + Run( + std::forward(request), + responseCb, + rpc, + dbState, + requestSettings, + std::move(context)); + } + } + template void WithServiceConnection(TCallback callback, TDbDriverStatePtr dbState, const TEndpointKey& preferredEndpoint, TRpcRequestSettings::TEndpointPolicy endpointPolicy) From e1bdb1fccd93907e4405dced7f4ea3f7b740982f Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 19:23:32 +0000 Subject: [PATCH 30/35] refactor: move logic of Run and RunOnArena into impl method (untested!) Requires testing via a real instance of YDBD to ensure that the refactoring is correct. --- .../grpc_connections/grpc_connections.h | 698 +++++++++++------- 1 file changed, 450 insertions(+), 248 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 2a1f3346370f..123472509e17 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -140,6 +140,7 @@ class TGRpcConnectionsImpl TRequest, TResponse>::TAsyncRequest; + template void Run( TRequest&& request, @@ -149,133 +150,143 @@ class TGRpcConnectionsImpl const TRpcRequestSettings& requestSettings, std::shared_ptr context = nullptr) { - using NYdbGrpc::TGrpcStatus; - using TConnection = std::unique_ptr>; - Y_ABORT_UNLESS(dbState); - - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - return; - } - - if (dbState->StatCollector.IsCollecting()) { - std::weak_ptr weakState = dbState; - const auto startTime = TInstant::Now(); - userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - const auto resultSize = response ? response->ByteSizeLong() : 0; - cb(response, status); - - if (auto state = weakState.lock()) { - state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - state->StatCollector.IncResultSize(resultSize); - } - }); - } - - WithServiceConnection( - [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - if (!status.Ok()) { - userResponseCb( - nullptr, - std::move(status)); - return; - } - - TCallMeta meta; - meta.Timeout = requestSettings.ClientTimeout; - #ifndef YDB_GRPC_UNSECURE_AUTH - meta.CallCredentials = dbState->CallCredentials; - #else - if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - try { - meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - } catch (const std::exception& e) { - userResponseCb( - nullptr, - TPlainStatus( - EStatus::CLIENT_UNAUTHENTICATED, - TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - ) - ); - return; - } - } - #endif - if (!requestSettings.TraceId.empty()) { - meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - } - - if (!requestSettings.RequestType.empty()) { - meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - } - - if (!dbState->Database.empty()) { - SetDatabaseHeader(meta, dbState->Database); - } - - static const std::string clientPid = GetClientPIDHeaderValue(); - - meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - dbState->StatCollector.IncGRpcInFlight(); - dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - NYdbGrpc::TAdvancedResponseCallback responseCbLow = - [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - dbState->StatCollector.DecGRpcInFlight(); - dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - std::multimap metadata; - - for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - - auto resp = new TResult( - std::move(response), - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint(), - std::move(metadata)); - - EnqueueResponse(resp); - } else { - dbState->StatCollector.IncReqFailDueTransportError(); - dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - auto resp = new TGRpcErrorResponse( - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint()); - - dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - EnqueueResponse(resp); - } - }; + RunImpl( + std::forward(request), + std::move(userResponseCb), + rpc, + dbState, + requestSettings, + std::move(context) + ); - serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, - context.get()); - }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); + // using NYdbGrpc::TGrpcStatus; + // using TConnection = std::unique_ptr>; + // Y_ABORT_UNLESS(dbState); + + // if (!TryCreateContext(context)) { + // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + // return; + // } + + // if (dbState->StatCollector.IsCollecting()) { + // std::weak_ptr weakState = dbState; + // const auto startTime = TInstant::Now(); + // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + // const auto resultSize = response ? response->ByteSizeLong() : 0; + // cb(response, status); + + // if (auto state = weakState.lock()) { + // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + // state->StatCollector.IncResultSize(resultSize); + // } + // }); + // } + + // WithServiceConnection( + // [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + // if (!status.Ok()) { + // userResponseCb( + // nullptr, + // std::move(status)); + // return; + // } + + // TCallMeta meta; + // meta.Timeout = requestSettings.ClientTimeout; + // #ifndef YDB_GRPC_UNSECURE_AUTH + // meta.CallCredentials = dbState->CallCredentials; + // #else + // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + // try { + // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + // } catch (const std::exception& e) { + // userResponseCb( + // nullptr, + // TPlainStatus( + // EStatus::CLIENT_UNAUTHENTICATED, + // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + // ) + // ); + // return; + // } + // } + // #endif + // if (!requestSettings.TraceId.empty()) { + // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + // } + + // if (!requestSettings.RequestType.empty()) { + // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + // } + + // if (!dbState->Database.empty()) { + // SetDatabaseHeader(meta, dbState->Database); + // } + + // static const std::string clientPid = GetClientPIDHeaderValue(); + + // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + // dbState->StatCollector.IncGRpcInFlight(); + // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + // NYdbGrpc::TAdvancedResponseCallback responseCbLow = + // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + // dbState->StatCollector.DecGRpcInFlight(); + // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + // std::multimap metadata; + + // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + + // auto resp = new TResult( + // std::move(response), + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint(), + // std::move(metadata)); + + // EnqueueResponse(resp); + // } else { + // dbState->StatCollector.IncReqFailDueTransportError(); + // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + // auto resp = new TGRpcErrorResponse( + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint()); + + // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + // EnqueueResponse(resp); + // } + // }; + + // serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, + // context.get()); + // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } + template void RunOnArena( TRequest* request, @@ -285,131 +296,142 @@ class TGRpcConnectionsImpl const TRpcRequestSettings& requestSettings, std::shared_ptr context = nullptr) { - using NYdbGrpc::TGrpcStatus; - using TConnection = std::unique_ptr>; - Y_ABORT_UNLESS(dbState); - - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - return; - } - - if (dbState->StatCollector.IsCollecting()) { - std::weak_ptr weakState = dbState; - const auto startTime = TInstant::Now(); - userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - const auto resultSize = response ? response->ByteSizeLong() : 0; - cb(response, status); - - if (auto state = weakState.lock()) { - state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - state->StatCollector.IncResultSize(resultSize); - } - }); - } - - WithServiceConnection( - [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - if (!status.Ok()) { - userResponseCb( - nullptr, - std::move(status)); - return; - } - - TCallMeta meta; - meta.Timeout = requestSettings.ClientTimeout; - #ifndef YDB_GRPC_UNSECURE_AUTH - meta.CallCredentials = dbState->CallCredentials; - #else - if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - try { - meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - } catch (const std::exception& e) { - userResponseCb( - nullptr, - TPlainStatus( - EStatus::CLIENT_UNAUTHENTICATED, - TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - ) - ); - return; - } - } - #endif - if (!requestSettings.TraceId.empty()) { - meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - } - - if (!requestSettings.RequestType.empty()) { - meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - } - - if (!dbState->Database.empty()) { - SetDatabaseHeader(meta, dbState->Database); - } - - static const std::string clientPid = GetClientPIDHeaderValue(); - - meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - dbState->StatCollector.IncGRpcInFlight(); - dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - NYdbGrpc::TAdvancedResponseCallback responseCbLow = - [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - dbState->StatCollector.DecGRpcInFlight(); - dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - std::multimap metadata; - - for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - - auto resp = new TResult( - std::move(response), - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint(), - std::move(metadata)); - - EnqueueResponse(resp); - } else { - dbState->StatCollector.IncReqFailDueTransportError(); - dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - auto resp = new TGRpcErrorResponse( - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint()); - - dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - EnqueueResponse(resp); - } - }; + RunImpl( + request, + std::move(userResponseCb), + rpc, + dbState, + requestSettings, + std::move(context) + ); - serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, - context.get()); - }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); + // using NYdbGrpc::TGrpcStatus; + // using TConnection = std::unique_ptr>; + // Y_ABORT_UNLESS(dbState); + + // if (!TryCreateContext(context)) { + // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + // return; + // } + + // if (dbState->StatCollector.IsCollecting()) { + // std::weak_ptr weakState = dbState; + // const auto startTime = TInstant::Now(); + // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + // const auto resultSize = response ? response->ByteSizeLong() : 0; + // cb(response, status); + + // if (auto state = weakState.lock()) { + // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + // state->StatCollector.IncResultSize(resultSize); + // } + // }); + // } + + + + // WithServiceConnection( + // [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + // if (!status.Ok()) { + // userResponseCb( + // nullptr, + // std::move(status)); + // return; + // } + + // TCallMeta meta; + // meta.Timeout = requestSettings.ClientTimeout; + // #ifndef YDB_GRPC_UNSECURE_AUTH + // meta.CallCredentials = dbState->CallCredentials; + // #else + // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + // try { + // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + // } catch (const std::exception& e) { + // userResponseCb( + // nullptr, + // TPlainStatus( + // EStatus::CLIENT_UNAUTHENTICATED, + // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + // ) + // ); + // return; + // } + // } + // #endif + // if (!requestSettings.TraceId.empty()) { + // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + // } + + // if (!requestSettings.RequestType.empty()) { + // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + // } + + // if (!dbState->Database.empty()) { + // SetDatabaseHeader(meta, dbState->Database); + // } + + // static const std::string clientPid = GetClientPIDHeaderValue(); + + // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + // dbState->StatCollector.IncGRpcInFlight(); + // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + // NYdbGrpc::TAdvancedResponseCallback responseCbLow = + // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + // dbState->StatCollector.DecGRpcInFlight(); + // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + // std::multimap metadata; + + // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + + // auto resp = new TResult( + // std::move(response), + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint(), + // std::move(metadata)); + + // EnqueueResponse(resp); + // } else { + // dbState->StatCollector.IncReqFailDueTransportError(); + // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + // auto resp = new TGRpcErrorResponse( + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint()); + + // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + // EnqueueResponse(resp); + // } + // }; + + // serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, + // context.get()); + // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } template @@ -766,6 +788,186 @@ template const TLog& GetLog() const override; private: + template + void RunImpl( + std::conditional_t request, + TResponseCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + const TRpcRequestSettings& requestSettings, + std::shared_ptr context = nullptr) + { + using NYdbGrpc::TGrpcStatus; + using TConnection = std::unique_ptr>; + Y_ABORT_UNLESS(dbState); + + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + return; + } + + if (dbState->StatCollector.IsCollecting()) { + std::weak_ptr weakState = dbState; + const auto startTime = TInstant::Now(); + userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + const auto resultSize = response ? response->ByteSizeLong() : 0; + cb(response, status); + + if (auto state = weakState.lock()) { + state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + state->StatCollector.IncResultSize(resultSize); + } + }); + } + + auto createMeta = [requestSettings, dbState](const TResponseCb& userResponseCb) -> std::optional { + TCallMeta meta; + meta.Timeout = requestSettings.ClientTimeout; + #ifndef YDB_GRPC_UNSECURE_AUTH + meta.CallCredentials = dbState->CallCredentials; + #else + if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + try { + meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + } catch (const std::exception& e) { + userResponseCb( + nullptr, + TPlainStatus( + EStatus::CLIENT_UNAUTHENTICATED, + TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + ) + ); + return std::nullopt; + } + } + #endif + if (!requestSettings.TraceId.empty()) { + meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + } + + if (!requestSettings.RequestType.empty()) { + meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + } + + if (!dbState->Database.empty()) { + SetDatabaseHeader(meta, dbState->Database); + } + + static const std::string clientPid = GetClientPIDHeaderValue(); + + meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + return meta; + }; + + auto createResponseCbLow = [this, dbState]( + const TResponseCb& userResponseCb, + const TEndpointKey& endpoint, + std::shared_ptr context) -> NYdbGrpc::TAdvancedResponseCallback { + dbState->StatCollector.IncGRpcInFlight(); + dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + NYdbGrpc::TAdvancedResponseCallback responseCbLow = + [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + dbState->StatCollector.DecGRpcInFlight(); + dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + std::multimap metadata; + + for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + + auto resp = new TResult( + std::move(response), + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint(), + std::move(metadata)); + + EnqueueResponse(resp); + } else { + dbState->StatCollector.IncReqFailDueTransportError(); + dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + auto resp = new TGRpcErrorResponse( + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint()); + + dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + EnqueueResponse(resp); + } + }; + + return responseCbLow; + }; + + if constexpr (RequestOnArena) { + WithServiceConnection( + [request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState, + createMeta = std::move(createMeta), createResponseCbLow = std::move(createResponseCbLow)] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + std::optional metaOpt = createMeta(userResponseCb); + if (!metaOpt) { + return; + } + auto meta = std::move(*metaOpt); + auto responseCbLow = createResponseCbLow(userResponseCb, endpoint, context); + + serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy + ); + } + else { + WithServiceConnection( + [request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState, + createMeta = std::move(createMeta), createResponseCbLow = std::move(createResponseCbLow)] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + std::optional metaOpt = createMeta(userResponseCb); + if (!metaOpt) { + return; + } + auto meta = std::move(*metaOpt); + auto responseCbLow = createResponseCbLow(userResponseCb, endpoint, context); + + serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy + ); + } + } + template void RunDeferredImpl( std::conditional_t request, From 776c2f82221482c10edecc0b2722bc804dfef0af Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sat, 14 Jun 2025 13:53:13 +0000 Subject: [PATCH 31/35] fix: return arena-allocated proto model when present in TValue::TImpl --- ydb/public/sdk/cpp/src/client/value/value.cpp | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 94e16990b310..34b489cd7ec6 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -1069,7 +1069,20 @@ class TValue::TImpl { , ProtoValue_{} , ArenaAllocatedValueProto_(arenaAllocatedValueProto) {} + const Ydb::Value& GetProto() const { + return ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_; + } + + Ydb::Value& GetProto() { + return ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_; + } + + Ydb::Value&& ExtractProto() && { + return std::move(ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_); + } + TType Type_; +private: Ydb::Value ProtoValue_; Ydb::Value* ArenaAllocatedValueProto_; }; @@ -1094,15 +1107,15 @@ TType & TValue::GetType() { } const Ydb::Value& TValue::GetProto() const { - return Impl_->ProtoValue_; + return Impl_->GetProto(); } Ydb::Value& TValue::GetProto() { - return Impl_->ProtoValue_; + return Impl_->GetProto(); } Ydb::Value&& TValue::ExtractProto() && { - return std::move(Impl_->ProtoValue_); + return std::move(std::move(*Impl_).ExtractProto()); } //////////////////////////////////////////////////////////////////////////////// @@ -1129,7 +1142,7 @@ class TValueParser::TImpl { : Value_(value.Impl_) , TypeParser_(value.GetType()) { - Reset(Value_->ProtoValue_); + Reset(Value_->GetProto()); } TImpl(const TType& type) From 2596df366e08045b10df493ff2a22d90f2f5f0a4 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sat, 14 Jun 2025 14:01:46 +0000 Subject: [PATCH 32/35] feat: set ZSTD compression in Apache Arrow codec --- ydb/public/lib/ydb_cli/import/import.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index b001a51ec8bf..986cbc2e1dd7 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1215,8 +1215,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }()); auto writeOptions = arrow::ipc::IpcWriteOptions::Defaults(); - // TODO: what codec to use? ZSTD is used in ydb_workload_import.cpp; use it here as well? - constexpr auto codecType = arrow::Compression::type::UNCOMPRESSED; + constexpr auto codecType = arrow::Compression::type::ZSTD; writeOptions.codec = *arrow::util::Codec::Create(codecType); // determines if we should fallback to YDB Types (TValue with Protobuf arena) when Apache Arrow failed. From 401e641168cdaa974257f845e8a9de77882910aa Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 15 Jun 2025 13:53:45 +0000 Subject: [PATCH 33/35] refactor: remove addressed TODO --- ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index fcca6eb36bd3..3bb668a5c1dd 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1036,7 +1036,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedU promise.SetValue(std::move(val)); }; - // TODO: make sure it is correct; see what server receives // don't give ownership of request to the function Connections_->RunDeferredOnArena( request, From 67c310f56152560202dc6cd1072f47e0df0548a0 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 15 Jun 2025 13:57:22 +0000 Subject: [PATCH 34/35] refactor: remove printings from import.cpp --- ydb/public/lib/ydb_cli/import/import.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 986cbc2e1dd7..645069bca3ed 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -862,15 +862,6 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons std::cerr << "Elapsed: " << std::setprecision(3) << duration.SecondsFloat() << " sec. Total read size: " << PrettifyBytes(TotalBytesRead) << ". Average processing speed: " << PrettifyBytes((double)TotalBytesRead / duration.SecondsFloat()) << "/s." << std::endl; - - // print strcture for the pipeline: - // elapsed_time_sec: [float] - // total_read_size_byte: [integer] - // avg_processing_speed_bytes_per_sec: [integer] - std::cout << "elapsed_time_sec: " << std::setprecision(5) << duration.SecondsFloat() << std::endl; - std::cout << "total_read_size_byte: " << TotalBytesRead << std::endl; - std::cout << "avg_processing_speed_MiB_per_sec: " << std::setprecision(5) - << ((double)TotalBytesRead / duration.SecondsFloat()) / (1024 * 1024) << std::endl; } // Removing all progress files that were a part of this import @@ -1157,11 +1148,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, ui64 skippedBytes = 0; ui64 nextSkipBorder = VerboseModeStepSize; - std::cerr << "Settings.Header_: " << Settings.Header_ << std::endl; - std::cerr << "Settings.SkipRows_: " << Settings.SkipRows_ << std::endl; - std::cerr << "rowsToSkip: " << rowsToSkip << std::endl; - std::cerr << "row: " << row << std::endl; - TString line; std::vector inFlightRequests; std::vector buffer; @@ -1198,11 +1184,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, // Note: table = dbPath (path to the table on the server) auto columns = DbTableInfo->GetTableColumns(); - std::cerr << "columns: " << columns.size() << std::endl; - for (const auto& column : columns) { - std::cerr << "column: " << column.Name << " " << column.Type.ToString() << std::endl; - } - const Ydb::Formats::CsvSettings csvSettings = ([this]() { Ydb::Formats::CsvSettings settings; settings.set_delimiter(Settings.Delimiter_); From a0dd9dee685f69a5f1565165897221bc0a71dadd Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 15 Jun 2025 14:37:10 +0000 Subject: [PATCH 35/35] refactor: remove commented code and redundant diffs --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 3 - ydb/public/lib/ydb_cli/common/csv_parser.h | 1 - ydb/public/lib/ydb_cli/import/import.cpp | 17 +- .../include/ydb-cpp-sdk/client/value/value.h | 2 - .../grpc_connections/grpc_connections.h | 259 ------------------ .../impl/ydb_internal/make_request/make.h | 2 +- .../src/client/table/impl/table_client.cpp | 4 - 7 files changed, 3 insertions(+), 285 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 26beb8688a00..8036d5b5358b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -414,7 +414,6 @@ class TCsvToYdbConverter { TValueBuilder Builder; }; - TCsvParseException FormatError(const std::exception& inputError, const TCsvParser::TParseMetadata& meta, const std::optional& columnName = {}) { @@ -580,8 +579,6 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f return TValue(ResultListType.value(), std::move(valueProto)); } - - TValue TCsvParser::BuildListOnArena( const std::vector& lines, const TString& filename, diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index a2355b991bf4..a344a27d1b43 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -2,7 +2,6 @@ #include #include -#include #include diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 645069bca3ed..4a509be3925f 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -716,7 +716,6 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons for (size_t fileOrderNumber = 0; fileOrderNumber < filePathsSize; ++fileOrderNumber) { const auto& filePath = filePaths[fileOrderNumber]; std::shared_ptr progressFile = LoadOrStartImportProgress(filePath); - auto func = [&, fileOrderNumber, progressFile, this] { std::unique_ptr fileInput; std::optional fileSizeHint; @@ -978,7 +977,6 @@ inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc) { // For the first attempt values are built before acquiring request inflight semaphore std::optional prebuiltValue = buildFunc(); - auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue)] (NYdb::NTable::TTableClient& tableClient) mutable -> TAsyncStatus { auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue]() { @@ -1019,9 +1017,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, }); } - -inline -TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( +inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( const TString& dbPath, std::shared_ptr batch, const arrow::ipc::IpcWriteOptions& writeOptions @@ -1060,10 +1056,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( }); } - - -inline -TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( +inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( const TString& dbPath, std::function&& buildFunc) { auto arena = std::make_shared(); @@ -1115,11 +1108,6 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( }); } - - - - - TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, const TString& dbPath, const TString& filePath, @@ -1275,7 +1263,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - // TODO: create arena here and pass it to UpsertTValueBufferOnArena? UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index a142779d8633..4c512390dc41 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -280,13 +280,11 @@ class TValue { Ydb::Value& GetProto(); Ydb::Value&& ExtractProto() &&; - private: class TImpl; std::shared_ptr Impl_; }; - class TValueParser : public TMoveOnly { friend class TResultSetParser; public: diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 123472509e17..0a608fe85171 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -140,7 +140,6 @@ class TGRpcConnectionsImpl TRequest, TResponse>::TAsyncRequest; - template void Run( TRequest&& request, @@ -158,135 +157,8 @@ class TGRpcConnectionsImpl requestSettings, std::move(context) ); - - // using NYdbGrpc::TGrpcStatus; - // using TConnection = std::unique_ptr>; - // Y_ABORT_UNLESS(dbState); - - // if (!TryCreateContext(context)) { - // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - // return; - // } - - // if (dbState->StatCollector.IsCollecting()) { - // std::weak_ptr weakState = dbState; - // const auto startTime = TInstant::Now(); - // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - // const auto resultSize = response ? response->ByteSizeLong() : 0; - // cb(response, status); - - // if (auto state = weakState.lock()) { - // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - // state->StatCollector.IncResultSize(resultSize); - // } - // }); - // } - - // WithServiceConnection( - // [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - // if (!status.Ok()) { - // userResponseCb( - // nullptr, - // std::move(status)); - // return; - // } - - // TCallMeta meta; - // meta.Timeout = requestSettings.ClientTimeout; - // #ifndef YDB_GRPC_UNSECURE_AUTH - // meta.CallCredentials = dbState->CallCredentials; - // #else - // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - // try { - // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - // } catch (const std::exception& e) { - // userResponseCb( - // nullptr, - // TPlainStatus( - // EStatus::CLIENT_UNAUTHENTICATED, - // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - // ) - // ); - // return; - // } - // } - // #endif - // if (!requestSettings.TraceId.empty()) { - // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - // } - - // if (!requestSettings.RequestType.empty()) { - // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - // } - - // if (!dbState->Database.empty()) { - // SetDatabaseHeader(meta, dbState->Database); - // } - - // static const std::string clientPid = GetClientPIDHeaderValue(); - - // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - // dbState->StatCollector.IncGRpcInFlight(); - // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - // NYdbGrpc::TAdvancedResponseCallback responseCbLow = - // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - // dbState->StatCollector.DecGRpcInFlight(); - // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - // std::multimap metadata; - - // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - - // auto resp = new TResult( - // std::move(response), - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint(), - // std::move(metadata)); - - // EnqueueResponse(resp); - // } else { - // dbState->StatCollector.IncReqFailDueTransportError(); - // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - // auto resp = new TGRpcErrorResponse( - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint()); - - // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - // EnqueueResponse(resp); - // } - // }; - - // serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, - // context.get()); - // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - template void RunOnArena( TRequest* request, @@ -304,134 +176,6 @@ class TGRpcConnectionsImpl requestSettings, std::move(context) ); - - // using NYdbGrpc::TGrpcStatus; - // using TConnection = std::unique_ptr>; - // Y_ABORT_UNLESS(dbState); - - // if (!TryCreateContext(context)) { - // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - // return; - // } - - // if (dbState->StatCollector.IsCollecting()) { - // std::weak_ptr weakState = dbState; - // const auto startTime = TInstant::Now(); - // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - // const auto resultSize = response ? response->ByteSizeLong() : 0; - // cb(response, status); - - // if (auto state = weakState.lock()) { - // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - // state->StatCollector.IncResultSize(resultSize); - // } - // }); - // } - - - - // WithServiceConnection( - // [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - // if (!status.Ok()) { - // userResponseCb( - // nullptr, - // std::move(status)); - // return; - // } - - // TCallMeta meta; - // meta.Timeout = requestSettings.ClientTimeout; - // #ifndef YDB_GRPC_UNSECURE_AUTH - // meta.CallCredentials = dbState->CallCredentials; - // #else - // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - // try { - // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - // } catch (const std::exception& e) { - // userResponseCb( - // nullptr, - // TPlainStatus( - // EStatus::CLIENT_UNAUTHENTICATED, - // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - // ) - // ); - // return; - // } - // } - // #endif - // if (!requestSettings.TraceId.empty()) { - // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - // } - - // if (!requestSettings.RequestType.empty()) { - // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - // } - - // if (!dbState->Database.empty()) { - // SetDatabaseHeader(meta, dbState->Database); - // } - - // static const std::string clientPid = GetClientPIDHeaderValue(); - - // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - // dbState->StatCollector.IncGRpcInFlight(); - // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - // NYdbGrpc::TAdvancedResponseCallback responseCbLow = - // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - // dbState->StatCollector.DecGRpcInFlight(); - // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - // std::multimap metadata; - - // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - - // auto resp = new TResult( - // std::move(response), - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint(), - // std::move(metadata)); - - // EnqueueResponse(resp); - // } else { - // dbState->StatCollector.IncReqFailDueTransportError(); - // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - // auto resp = new TGRpcErrorResponse( - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint()); - - // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - // EnqueueResponse(resp); - // } - // }; - - // serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, - // context.get()); - // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } template @@ -564,9 +308,6 @@ template context); } - - - template class TStream> using TStreamRpc = typename TStream< diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h index 392fd35cf793..742e6a8a53e0 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h @@ -49,12 +49,12 @@ TProtoRequest MakeOperationRequest(const TRequestSettings& settings) { template TProtoRequest* MakeRequestOnArena(google::protobuf::Arena* arena) { - assert(arena != nullptr); return google::protobuf::Arena::CreateMessage(arena); } template TProtoRequest* MakeOperationRequestOnArena(const TRequestSettings& settings, google::protobuf::Arena* arena) { + Y_ASSERT(arena != nullptr); auto request = MakeRequestOnArena(arena); FillOperationParams(settings, *request); return request; diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 3bb668a5c1dd..7bc8b9d98228 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1015,7 +1015,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableUnsafe(const st return promise.GetFuture(); } - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, @@ -1048,8 +1047,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedU return promise.GetFuture(); } - - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); @@ -1076,7 +1073,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, return promise.GetFuture(); } - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings) {