From cd684ed39e35050c94de99c60a59947000f5751a Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 09:16:28 +0000 Subject: [PATCH 01/41] feat: allocate Ydb::Value on arena if provided in BuildList --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 94 +++++++++++++------ ydb/public/lib/ydb_cli/common/csv_parser.h | 15 ++- .../lib/ydb_cli/common/csv_parser_ut.cpp | 6 +- ydb/public/lib/ydb_cli/import/import.cpp | 6 +- 4 files changed, 86 insertions(+), 35 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 76d0dce81e3f..7815056fceb5 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -165,7 +165,7 @@ class TCsvToYdbConverter { } case EPrimitiveType::Interval64: Builder.Interval64(GetArithmetic(token)); - break; + break; case EPrimitiveType::TzDate: Builder.TzDate(token); break; @@ -441,7 +441,7 @@ TStringBuf Consume(NCsvFormat::CsvSplitter& splitter, TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::optional& nullValue, const std::map* destinationTypes, - const std::map* paramSources) + const std::map* paramSources) : HeaderRow(std::move(headerRow)) , Delimeter(delimeter) , NullValue(nullValue) @@ -454,7 +454,7 @@ TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::opt TCsvParser::TCsvParser(TVector&& header, const char delimeter, const std::optional& nullValue, const std::map* destinationTypes, - const std::map* paramSources) + const std::map* paramSources) : Header(std::move(header)) , Delimeter(delimeter) , NullValue(nullValue) @@ -529,41 +529,76 @@ void TCsvParser::BuildValue(TString& data, TValueBuilder& builder, const TType& builder.EndStruct(); } -TValue TCsvParser::BuildList(std::vector& lines, const TString& filename, std::optional row) const { +TValue TCsvParser::BuildList(const std::vector& lines, const TString& filename, std::optional row, google::protobuf::Arena* arena) const { std::vector> columnTypeParsers; columnTypeParsers.reserve(ResultColumnCount); for (const TType* type : ResultLineTypesSorted) { columnTypeParsers.push_back(std::make_unique(*type)); } - - Ydb::Value listValue; - auto* listItems = listValue.mutable_items(); - listItems->Reserve(lines.size()); - for (auto& line : lines) { - NCsvFormat::CsvSplitter splitter(line, Delimeter); - TParseMetadata meta {row, filename}; - auto* structItems = listItems->Add()->mutable_items(); - structItems->Reserve(ResultColumnCount); - auto headerIt = Header.cbegin(); - auto skipIt = SkipBitMap.begin(); - auto typeParserIt = columnTypeParsers.begin(); - do { - if (headerIt == Header.cend()) { // SkipBitMap has same size as Header - throw FormatError(yexception() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << line << "\"", meta); + + if (arena) { + // Arena allocation path + Ydb::Value* valuePtr = google::protobuf::Arena::CreateMessage(arena); + auto* listItems = valuePtr->mutable_items(); + listItems->Reserve(lines.size()); + + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); } - TStringBuf nextField = Consume(splitter, meta, *headerIt); - if (!*skipIt) { - *structItems->Add() = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt).GetProto(); - ++typeParserIt; + } + + // Return a TValue that references the arena-allocated message + return TValue(ResultListType.value(), std::move(*valuePtr)); + } else { + // Original path with local value object + Ydb::Value valueProto; + auto* listItems = valueProto.mutable_items(); + listItems->Reserve(lines.size()); + + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); } - ++headerIt; - ++skipIt; - } while (splitter.Step()); - if (row.has_value()) { - ++row.value(); } + + // Return a TValue that takes ownership via move + return TValue(ResultListType.value(), std::move(valueProto)); } - return TValue(ResultListType.value(), std::move(listValue)); +} + +// Helper method to process a single CSV line +void TCsvParser::ProcessCsvLine( + const TString& line, + google::protobuf::RepeatedPtrField* listItems, + const std::vector>& columnTypeParsers, + std::optional currentRow, + const TString& filename) const { + + NCsvFormat::CsvSplitter splitter(line, Delimeter); + auto* structItems = listItems->Add()->mutable_items(); + structItems->Reserve(ResultColumnCount); + + const TParseMetadata meta {currentRow, filename}; + + auto headerIt = Header.cbegin(); + auto skipIt = SkipBitMap.begin(); + auto typeParserIt = columnTypeParsers.begin(); + + do { + if (headerIt == Header.cend()) { // SkipBitMap has same size as Header + throw FormatError(yexception() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << line << "\"", meta); + } + TStringBuf nextField = Consume(splitter, meta, *headerIt); + if (!*skipIt) { + *structItems->Add() = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt).GetProto(); + ++typeParserIt; + } + ++headerIt; + ++skipIt; + } while (splitter.Step()); } void TCsvParser::BuildLineType() { @@ -609,3 +644,4 @@ const TVector& TCsvParser::GetHeader() { } } + diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 05c3def83d9c..0f2bda2cb06a 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include @@ -35,8 +37,9 @@ class TCsvParser { void BuildParams(TString& data, TParamsBuilder& builder, const TParseMetadata& meta) const; void BuildValue(TString& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const; - TValue BuildList(std::vector& lines, const TString& filename, - std::optional row = std::nullopt) const; + TValue BuildList(const std::vector& lines, const TString& filename, + std::optional row = std::nullopt, + google::protobuf::Arena* arena = nullptr) const; void BuildLineType(); const TVector& GetHeader(); @@ -60,6 +63,14 @@ class TCsvParser { // Types of columns in a single row in resulting TValue. // Column order according to the header, though can have less elements than the Header std::vector ResultLineTypesSorted; + + // Helper method to process a single line of CSV data + void ProcessCsvLine( + const TString& line, + google::protobuf::RepeatedPtrField* listItems, + const std::vector>& columnTypeParsers, + std::optional currentRow, + const TString& filename) const; }; } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp index eccfacbf92ce..6cfaa4db3603 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp @@ -54,7 +54,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(header), ',', "", &columnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); AssertValuesEqual(builtResult, estimatedResult); } @@ -317,7 +317,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { {"col2", TTypeBuilder().BeginOptional().Primitive(EPrimitiveType::Int64).EndOptional().Build()}, {"col3", TTypeBuilder().Primitive(EPrimitiveType::Bool).Build()}, }; - + TString csvHeader = "col4,col3,col5,col1,col6"; std::vector data = { "col4 unused value,true,col5 unused value,col1 value,col6 unused value" @@ -325,7 +325,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(csvHeader), ',', "", &tableColumnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); TValue expexctedResult = TValueBuilder().BeginList() .AddListItem().BeginStruct() diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index beb067c7e13f..4fb83f189eaf 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include #include @@ -50,6 +52,7 @@ #include #endif +#include namespace NYdb { namespace NConsoleClient { @@ -998,6 +1001,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, throw; } }; + UpsertTValueBuffer(dbPath, std::move(buildFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); @@ -1143,7 +1147,7 @@ TStatus TImportFileClient::TImpl::UpsertCsvByBlocks(const TString& filePath, auto upsertCsvFunc = [&](std::vector&& buffer) { auto buildFunc = [&jobsInflight, &parser, buffer = std::move(buffer), &filePath, this]() mutable { try { - return parser.BuildList(buffer, filePath); + return parser.BuildList(buffer, filePath, std::nullopt); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); From 5ccf756a76db55deb95b467169999242fe0e5a3e Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 11:20:39 +0000 Subject: [PATCH 02/41] feat: move protos into request in TTableClient::TImpl::BulkUpsert Because `TValue&& rows` is provided as rvalue reference, we can move its value into the request rather than making a copy of the data. Speed boost: ~130MiB/s -> ~168-200MiB/s. --- .../sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 6 +++++- ydb/public/sdk/cpp/src/client/value/value.cpp | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index a43f7f4aacba..ed4fed8dad74 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -29,6 +29,8 @@ class TType { const Ydb::Type& GetProto() const; Ydb::Type& GetProto(); + Ydb::Type ExtractProto() &&; + private: class TImpl; std::shared_ptr Impl_; @@ -278,11 +280,13 @@ class TValue { TValue(const TType& type, Ydb::Value&& valueProto); const TType& GetType() const; - TType & GetType(); + TType& GetType(); const Ydb::Value& GetProto() const; Ydb::Value& GetProto(); + Ydb::Value ExtractProto() &&; + private: class TImpl; std::shared_ptr Impl_; diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 2516f3a42264..8eefd20b245d 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -111,6 +111,10 @@ Ydb::Type& TType::GetProto() return Impl_->ProtoType_; } +Ydb::Type TType::ExtractProto() && { + return std::move(Impl_->ProtoType_); +} + //////////////////////////////////////////////////////////////////////////////// class TTypeParser::TImpl { @@ -1080,6 +1084,10 @@ Ydb::Value& TValue::GetProto() { return Impl_->ProtoValue_; } +Ydb::Value TValue::ExtractProto() && { + return std::move(Impl_->ProtoValue_); +} + //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From 6e99012eef3d6df070f58952a0454a2171d18223 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 14:00:46 +0000 Subject: [PATCH 03/41] feat: extract proto model out of FieldToValue in TCsvParser --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 7815056fceb5..fe8bf50b6f3a 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -593,7 +593,8 @@ void TCsvParser::ProcessCsvLine( } TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { - *structItems->Add() = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt).GetProto(); + TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); + *structItems->Add() = std::move(builtValue).ExtractProto(); ++typeParserIt; } ++headerIt; From c301f468cb9eaa4fcc6de946b47250e25d65b2b4 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 13:38:35 +0000 Subject: [PATCH 04/41] feat: add && in return types of ExtractProto methods --- ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 4 ++-- ydb/public/sdk/cpp/src/client/value/value.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index ed4fed8dad74..e83b2e3558ad 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -29,7 +29,7 @@ class TType { const Ydb::Type& GetProto() const; Ydb::Type& GetProto(); - Ydb::Type ExtractProto() &&; + Ydb::Type&& ExtractProto() &&; private: class TImpl; @@ -285,7 +285,7 @@ class TValue { const Ydb::Value& GetProto() const; Ydb::Value& GetProto(); - Ydb::Value ExtractProto() &&; + Ydb::Value&& ExtractProto() &&; private: class TImpl; diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 8eefd20b245d..17f22a43d755 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -111,7 +111,7 @@ Ydb::Type& TType::GetProto() return Impl_->ProtoType_; } -Ydb::Type TType::ExtractProto() && { +Ydb::Type&& TType::ExtractProto() && { return std::move(Impl_->ProtoType_); } @@ -1084,7 +1084,7 @@ Ydb::Value& TValue::GetProto() { return Impl_->ProtoValue_; } -Ydb::Value TValue::ExtractProto() && { +Ydb::Value&& TValue::ExtractProto() && { return std::move(Impl_->ProtoValue_); } From 120b9947a87ab7f4e2f7efd37d3c476b887ef1e3 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 13:56:16 +0000 Subject: [PATCH 05/41] feat: move rows' proto into request proto in a separate method BulkUpsertUnretryable (leaving BulkUpsert unchanged) --- ydb/public/lib/ydb_cli/import/import.cpp | 2 +- .../include/ydb-cpp-sdk/client/table/table.h | 7 +++++ .../src/client/table/impl/table_client.cpp | 28 +++++++++++++++++++ .../cpp/src/client/table/impl/table_client.h | 5 ++++ ydb/public/sdk/cpp/src/client/table/table.cpp | 6 ++++ 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 4fb83f189eaf..0ae9c09e24ec 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -895,7 +895,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, // to prevent copying data in retryFunc in a happy way when there is only one request TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(); prebuiltValue = std::nullopt; - return tableClient.BulkUpsert(dbPath, std::move(builtValue), UpsertSettings) + return tableClient.BulkUpsertUnretryable(dbPath, std::move(builtValue), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); return NThreading::MakeFuture(status); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 01bccbc00c03..8daa2850403b 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1260,6 +1260,13 @@ class TTableClient { //! Using the client after call this method causes UB NThreading::TFuture Stop(); + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, + * therefore, it is not retryable with the same `rows` object. + */ + TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, + const TBulkUpsertSettings& settings); + //! Non-transactional fast bulk write. //! Interanlly it uses an implicit session and thus doesn't need a session to be passed. //! "rows" parameter must be a list of structs where each stuct represents one row. diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 163c7a477ee8..c56b6f5fb530 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1020,6 +1020,34 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, return promise.GetFuture(); } + +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { + auto request = MakeOperationRequest(settings); + request.set_table(TStringType{table}); + *request.mutable_rows()->mutable_type() = TProtoAccessor::GetProto(rows.GetType()); + *request.mutable_rows()->mutable_value() = rows.GetProto(); + + auto promise = NewPromise(); + + auto extractor = [promise] + (google::protobuf::Any* any, TPlainStatus status) mutable { + Y_UNUSED(any); + TBulkUpsertResult val(TStatus(std::move(status))); + promise.SetValue(std::move(val)); + }; + + Connections_->RunDeferred( + std::move(request), + extractor, + &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings)); + + return promise.GetFuture(); +} + + TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index d1693310a83b..de702689de86 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -138,6 +138,11 @@ class TTableClient::TImpl: public TClientImplCommon, public void SetStatCollector(const NSdkStats::TStatCollector::TClientStatCollector& collector); + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, + * therefore, it is not retryable with the same `rows` object. + */ + TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings, bool canMove); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index fd64125bfedd..e8dfdace7783 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1457,6 +1457,12 @@ NThreading::TFuture TTableClient::Stop() { return Impl_->Stop(); } +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& table, TValue&& rows, + const TBulkUpsertSettings& settings) +{ + return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); +} + TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { From d5816afcba818b77cbc4ec37c1d2e631db32008a Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 15:54:34 +0000 Subject: [PATCH 06/41] feat: move line into buffer in UpsertCsv --- ydb/public/lib/ydb_cli/import/import.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 0ae9c09e24ec..9ea41d52f788 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1041,7 +1041,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, line.erase(line.size() - Settings.Delimiter_.size()); } - buffer.push_back(line); + buffer.push_back(std::move(line)); if (readBytes >= nextReadBorder && Settings.Verbose_) { nextReadBorder += VerboseModeStepSize; From 88b80d511ab06027485b5ebeb14525395bb4a495 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 18 Apr 2025 09:16:28 +0000 Subject: [PATCH 07/41] feat: introduce per-request arena (allocate Ydb::Value on arena, 1-to-1 allocation) --- ydb/public/lib/ydb_cli/import/import.cpp | 33 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 9ea41d52f788..e2481ccdd92d 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -302,6 +302,11 @@ class TCsvFileReader { } } + // Get the protobuf arena for allocating messages + // google::protobuf::Arena* GetArena() { + // return &ProtoArena; + // } + TFileChunk& GetChunk(size_t threadId) { if (threadId >= Chunks.size()) { throw yexception() << "File chunk number is too big"; @@ -321,6 +326,7 @@ class TCsvFileReader { TVector Chunks; size_t SplitCount; size_t MaxInFlight; + // google::protobuf::Arena ProtoArena; }; class TJobInFlightManager { @@ -599,6 +605,10 @@ TImportFileClient::TImportFileClient(const TDriver& driver, const TClientCommand } TStatus TImportFileClient::Import(const TVector& filePaths, const TString& dbPath) { + // { + // google::protobuf::Arena arena; + // MyMessage* message = google::protobuf::Arena::Create(&arena); + // } return Impl_->Import(filePaths, dbPath); } @@ -989,10 +999,18 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { + auto upsertCsvFunc = [&](std::vector&& buffer, ui64 batchBytes, ui64 row, std::shared_ptr batchStatus) { + // Create an arena per batch that will be shared in the closure + google::protobuf::ArenaOptions options{}; + options.start_block_size = batchBytes; + std::shared_ptr arena = std::make_shared(std::move(options)); + + auto buildFunc = [&, buffer = std::move(buffer), row, arena, this] () mutable { try { - return parser.BuildList(buffer, filePath, row); + // Use the arena for allocating protobuf objects + auto a = parser.BuildList(buffer, filePath, row, arena.get()); + // std::cerr << "row " << row << ": SpaceUsed=" << PrettifyBytes(arena->SpaceUsed()) << ", SpaceAllocated=" << PrettifyBytes(arena->SpaceAllocated()) << std::endl; + return a; } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); @@ -1003,7 +1021,8 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; UpsertTValueBuffer(dbPath, std::move(buildFunc)) - .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + // copy arena to the closure to ensure it's not destroyed before the request completes + .Apply([&, batchStatus, arena](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); if (asyncStatus.GetValueSync().IsSuccess()) { batchStatus->Completed = true; @@ -1056,9 +1075,9 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, progressCallback(skippedBytes + readBytes, *inputSizeHint); } - auto workerFunc = [&upsertCsvFunc, row, buffer = std::move(buffer), + auto workerFunc = [&upsertCsvFunc, batchBytes, row, buffer = std::move(buffer), batchStatus = createStatus(row + batchRows)]() mutable { - upsertCsvFunc(std::move(buffer), row, batchStatus); + upsertCsvFunc(std::move(buffer), batchBytes, row, batchStatus); }; row += batchRows; batchRows = 0; @@ -1079,7 +1098,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, // Send the rest if buffer is not empty if (!buffer.empty() && countInput.Counter() > 0 && !Failed) { jobInflightManager->AcquireJob(); - upsertCsvFunc(std::move(buffer), row, createStatus(row + batchRows)); + upsertCsvFunc(std::move(buffer), batchBytes, row, createStatus(row + batchRows)); } jobInflightManager->WaitForAllJobs(); From e04dd2e248c8dfa99de06f119751270f3c4f79e9 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 16:06:21 +0000 Subject: [PATCH 08/41] Revert "feat: introduce per-request arena (allocate Ydb::Value on arena, 1-to-1 allocation)" This reverts commit b9bd7cd615768e4854521ff04211a02f67758900. --- ydb/public/lib/ydb_cli/import/import.cpp | 33 +++++------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index e2481ccdd92d..9ea41d52f788 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -302,11 +302,6 @@ class TCsvFileReader { } } - // Get the protobuf arena for allocating messages - // google::protobuf::Arena* GetArena() { - // return &ProtoArena; - // } - TFileChunk& GetChunk(size_t threadId) { if (threadId >= Chunks.size()) { throw yexception() << "File chunk number is too big"; @@ -326,7 +321,6 @@ class TCsvFileReader { TVector Chunks; size_t SplitCount; size_t MaxInFlight; - // google::protobuf::Arena ProtoArena; }; class TJobInFlightManager { @@ -605,10 +599,6 @@ TImportFileClient::TImportFileClient(const TDriver& driver, const TClientCommand } TStatus TImportFileClient::Import(const TVector& filePaths, const TString& dbPath) { - // { - // google::protobuf::Arena arena; - // MyMessage* message = google::protobuf::Arena::Create(&arena); - // } return Impl_->Import(filePaths, dbPath); } @@ -999,18 +989,10 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - auto upsertCsvFunc = [&](std::vector&& buffer, ui64 batchBytes, ui64 row, std::shared_ptr batchStatus) { - // Create an arena per batch that will be shared in the closure - google::protobuf::ArenaOptions options{}; - options.start_block_size = batchBytes; - std::shared_ptr arena = std::make_shared(std::move(options)); - - auto buildFunc = [&, buffer = std::move(buffer), row, arena, this] () mutable { + auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { + auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { try { - // Use the arena for allocating protobuf objects - auto a = parser.BuildList(buffer, filePath, row, arena.get()); - // std::cerr << "row " << row << ": SpaceUsed=" << PrettifyBytes(arena->SpaceUsed()) << ", SpaceAllocated=" << PrettifyBytes(arena->SpaceAllocated()) << std::endl; - return a; + return parser.BuildList(buffer, filePath, row); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); @@ -1021,8 +1003,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; UpsertTValueBuffer(dbPath, std::move(buildFunc)) - // copy arena to the closure to ensure it's not destroyed before the request completes - .Apply([&, batchStatus, arena](const TAsyncStatus& asyncStatus) { + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); if (asyncStatus.GetValueSync().IsSuccess()) { batchStatus->Completed = true; @@ -1075,9 +1056,9 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, progressCallback(skippedBytes + readBytes, *inputSizeHint); } - auto workerFunc = [&upsertCsvFunc, batchBytes, row, buffer = std::move(buffer), + auto workerFunc = [&upsertCsvFunc, row, buffer = std::move(buffer), batchStatus = createStatus(row + batchRows)]() mutable { - upsertCsvFunc(std::move(buffer), batchBytes, row, batchStatus); + upsertCsvFunc(std::move(buffer), row, batchStatus); }; row += batchRows; batchRows = 0; @@ -1098,7 +1079,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, // Send the rest if buffer is not empty if (!buffer.empty() && countInput.Counter() > 0 && !Failed) { jobInflightManager->AcquireJob(); - upsertCsvFunc(std::move(buffer), batchBytes, row, createStatus(row + batchRows)); + upsertCsvFunc(std::move(buffer), row, createStatus(row + batchRows)); } jobInflightManager->WaitForAllJobs(); From 1d8d7965d1d2358a4198b91cdd47c991594e6a67 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 19:52:53 +0000 Subject: [PATCH 09/41] feat: implement single-arena-per-request strategy Implement additional APIs that work with arena-allocated proto messages. The implementation creates a new protobuf::Arena per batch request and builds Ydb::Value inside this arena inside TCsvParser::BuildListOnArena. Ydb::Value gets created once and then moved into Ydb::Table::BulkUpsertRequest, which is allocated on the same arena (allocation in the same arena prevents copying). Left undone/requires modifications: 1. TODO: I had to copy-paste RunDeferred/Run methods (see: RunDeferredOnArena/RunOnArena) because arena-allocated messages are returned as pointers, but the API expected rvalue/lvalue references on the request message. If the pointer is deferefenced, there will be a single copy of the message inside TGRpcConnectionsImpl::Run when WithServiceConnection is called and request is moved into capture parameter of a callback. In the best case, there must be no code duplication (the copy-pasted methods differ only in the type of accepted request: they accept TRequest*). 2. TODO: ensure correctness. Speed boost: ~130MiB/s -> ~250MiB/s --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 70 +++-- ydb/public/lib/ydb_cli/common/csv_parser.h | 12 +- ydb/public/lib/ydb_cli/import/import.cpp | 85 +++++- .../include/ydb-cpp-sdk/client/table/table.h | 7 + .../grpc_connections/grpc_connections.h | 246 +++++++++++++++++- .../impl/ydb_internal/make_request/make.h | 14 + .../src/client/table/impl/table_client.cpp | 43 +++ .../cpp/src/client/table/impl/table_client.h | 8 + ydb/public/sdk/cpp/src/client/table/table.cpp | 8 + 9 files changed, 458 insertions(+), 35 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index fe8bf50b6f3a..5497b24ed73b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -529,46 +529,64 @@ void TCsvParser::BuildValue(TString& data, TValueBuilder& builder, const TType& builder.EndStruct(); } -TValue TCsvParser::BuildList(const std::vector& lines, const TString& filename, std::optional row, google::protobuf::Arena* arena) const { +TValue TCsvParser::BuildList(const std::vector& lines, const TString& filename, std::optional row) const { std::vector> columnTypeParsers; columnTypeParsers.reserve(ResultColumnCount); for (const TType* type : ResultLineTypesSorted) { columnTypeParsers.push_back(std::make_unique(*type)); } - if (arena) { - // Arena allocation path - Ydb::Value* valuePtr = google::protobuf::Arena::CreateMessage(arena); - auto* listItems = valuePtr->mutable_items(); - listItems->Reserve(lines.size()); + // Original path with local value object + Ydb::Value valueProto; + auto* listItems = valueProto.mutable_items(); + listItems->Reserve(lines.size()); - for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); - if (row.has_value()) { - ++row.value(); - } + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); } + } - // Return a TValue that references the arena-allocated message - return TValue(ResultListType.value(), std::move(*valuePtr)); - } else { - // Original path with local value object - Ydb::Value valueProto; - auto* listItems = valueProto.mutable_items(); - listItems->Reserve(lines.size()); + // Return a TValue that takes ownership via move + return TValue(ResultListType.value(), std::move(valueProto)); +} - for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); - if (row.has_value()) { - ++row.value(); - } - } - // Return a TValue that takes ownership via move - return TValue(ResultListType.value(), std::move(valueProto)); + +// TODO: std::pair -> ArenaAllocatedTValue +std::pair TCsvParser::BuildListOnArena( + const std::vector& lines, + const TString& filename, + google::protobuf::Arena* arena, + std::optional row +) const { + assert(arena != nullptr); + + std::vector> columnTypeParsers; + columnTypeParsers.reserve(ResultColumnCount); + for (const TType* type : ResultLineTypesSorted) { + columnTypeParsers.push_back(std::make_unique(*type)); + } + + // allocate Ydb::Value on arena + Ydb::Value* value = google::protobuf::Arena::CreateMessage(arena); + auto* listItems = value->mutable_items(); + listItems->Reserve(lines.size()); + + for (const auto& line : lines) { + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + if (row.has_value()) { + ++row.value(); + } } + + // Return a TValue that references the arena-allocated message + return std::make_pair(ResultListType.value(), value); } + + // Helper method to process a single CSV line void TCsvParser::ProcessCsvLine( const TString& line, diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 0f2bda2cb06a..af0a8135b768 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -37,9 +37,17 @@ class TCsvParser { void BuildParams(TString& data, TParamsBuilder& builder, const TParseMetadata& meta) const; void BuildValue(TString& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const; + TValue BuildList(const std::vector& lines, const TString& filename, - std::optional row = std::nullopt, - google::protobuf::Arena* arena = nullptr) const; + std::optional row = std::nullopt) const; + + std::pair BuildListOnArena( + const std::vector& lines, + const TString& filename, + google::protobuf::Arena* arena, + std::optional row = std::nullopt + ) const; + void BuildLineType(); const TVector& GetHeader(); diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 9ea41d52f788..80725f142bba 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -558,6 +558,10 @@ class TImportFileClient::TImpl { std::shared_ptr progressFile); TAsyncStatus UpsertTValueBuffer(const TString& dbPath, TValueBuilder& builder); TAsyncStatus UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc); + + TAsyncStatus UpsertTValueBufferOnArena( + const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc); + TStatus UpsertJson(IInputStream &input, const TString &dbPath, std::optional inputSizeHint, ProgressCallbackFunc & progressCallback); TStatus UpsertParquet(const TString& filename, const TString& dbPath, ProgressCallbackFunc & progressCallback); @@ -888,6 +892,7 @@ inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc) { // For the first attempt values are built before acquiring request inflight semaphore std::optional prebuiltValue = buildFunc(); + auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue)] (NYdb::NTable::TTableClient& tableClient) mutable -> TAsyncStatus { auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue]() { @@ -928,6 +933,67 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, }); } + + + + +// TODO: std::pair -> ArenaAllocatedTValue +inline +TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( + const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc) { + using YdbValuePair = std::pair; + auto arena = std::make_shared(); + + // For the first attempt values are built before acquiring request inflight semaphore + std::optional prebuiltValue = buildFunc(arena.get()); + + auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), + prebuiltValue = std::move(prebuiltValue), arena = std::move(arena)] + (NYdb::NTable::TTableClient& tableClient) mutable -> TAsyncStatus { + auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue, arena]() { + // For every retry attempt after first request build value from strings again + // to prevent copying data in retryFunc in a happy way when there is only one request + YdbValuePair builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); + prebuiltValue = std::nullopt; + return tableClient.BulkUpsertUnretryableArenaAllocated( + dbPath, std::move(builtValue), arena.get(), UpsertSettings) + .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { + NYdb::TStatus status = bulkUpsertResult.GetValueSync(); + return NThreading::MakeFuture(status); + }); + }; + // Running heavy building task on processing pool: + return NThreading::Async(std::move(buildTValueAndSendRequest), *ProcessingPool); + }; + if (!RequestsInflight->try_acquire()) { + if (Settings.Verbose_ && Settings.NewlineDelimited_) { + if (!InformedAboutLimit.exchange(true)) { + Cerr << (TStringBuilder() << "@ (each '@' means max request inflight is reached and a worker thread is waiting for " + "any response from database)" << Endl); + } else { + Cerr << '@'; + } + } + RequestsInflight->acquire(); + } + return TableClient->RetryOperation(std::move(retryFunc), RetrySettings) + .Apply([this](const TAsyncStatus& asyncStatus) { + NYdb::TStatus status = asyncStatus.GetValueSync(); + if (!status.IsSuccess()) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(status); + } + } + RequestsInflight->release(); + return asyncStatus; + }); +} + + + + + + TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, const TString& dbPath, const TString& filePath, @@ -990,9 +1056,21 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { + // auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { + // try { + // return parser.BuildList(buffer, filePath, row); + // } catch (const std::exception& e) { + // if (!Failed.exchange(true)) { + // ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); + // } + // jobInflightManager->ReleaseJob(); + // throw; + // } + // }; + + auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { try { - return parser.BuildList(buffer, filePath, row); + return parser.BuildListOnArena(buffer, filePath, arena, row); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); @@ -1002,7 +1080,8 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - UpsertTValueBuffer(dbPath, std::move(buildFunc)) + // UpsertTValueBuffer(dbPath, std::move(buildFunc)) + UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); if (asyncStatus.GetValueSync().IsSuccess()) { diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 8daa2850403b..65e84b17aed1 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1267,6 +1267,13 @@ class TTableClient { TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings + ); + //! Non-transactional fast bulk write. //! Interanlly it uses an implicit session and thus doesn't need a session to be passed. //! "rows" parameter must be a list of structs where each stuct represents one row. diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index f1a23631c94e..4a457de8642a 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -174,7 +174,7 @@ class TGRpcConnectionsImpl } WithServiceConnection( - [this, request = std::move(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { if (!status.Ok()) { userResponseCb( @@ -193,7 +193,7 @@ class TGRpcConnectionsImpl meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); } catch (const std::exception& e) { userResponseCb( - nullptr, + nullptr, TPlainStatus( EStatus::CLIENT_UNAUTHENTICATED, TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() @@ -276,6 +276,150 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } + + + + + template + void RunOnArena( + TRequest* request, + TResponseCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + const TRpcRequestSettings& requestSettings, + std::shared_ptr context = nullptr) + { + using NYdbGrpc::TGrpcStatus; + using TConnection = std::unique_ptr>; + Y_ABORT_UNLESS(dbState); + + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + return; + } + + if (dbState->StatCollector.IsCollecting()) { + std::weak_ptr weakState = dbState; + const auto startTime = TInstant::Now(); + userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + const auto resultSize = response ? response->ByteSizeLong() : 0; + cb(response, status); + + if (auto state = weakState.lock()) { + state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + state->StatCollector.IncResultSize(resultSize); + } + }); + } + + WithServiceConnection( + [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + TCallMeta meta; + meta.Timeout = requestSettings.ClientTimeout; + #ifndef YDB_GRPC_UNSECURE_AUTH + meta.CallCredentials = dbState->CallCredentials; + #else + if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + try { + meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + } catch (const std::exception& e) { + userResponseCb( + nullptr, + TPlainStatus( + EStatus::CLIENT_UNAUTHENTICATED, + TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + ) + ); + return; + } + } + #endif + if (!requestSettings.TraceId.empty()) { + meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + } + + if (!requestSettings.RequestType.empty()) { + meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + } + + if (!dbState->Database.empty()) { + SetDatabaseHeader(meta, dbState->Database); + } + + static const std::string clientPid = GetClientPIDHeaderValue(); + + meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + dbState->StatCollector.IncGRpcInFlight(); + dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + NYdbGrpc::TAdvancedResponseCallback responseCbLow = + [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + dbState->StatCollector.DecGRpcInFlight(); + dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + std::multimap metadata; + + for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + + auto resp = new TResult( + std::move(response), + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint(), + std::move(metadata)); + + EnqueueResponse(resp); + } else { + dbState->StatCollector.IncReqFailDueTransportError(); + dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + auto resp = new TGRpcErrorResponse( + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint()); + + dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + EnqueueResponse(resp); + } + }; + + serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, + context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); + } + + + + + template void RunDeferred( TRequest&& request, @@ -321,7 +465,62 @@ class TGRpcConnectionsImpl }; Run( - std::move(request), + std::forward(request), + responseCb, + rpc, + dbState, + requestSettings, + std::move(context)); + } + + + + template + void RunDeferredOnArena( + TRequest* request, + TDeferredOperationCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + TDuration deferredTimeout, + const TRpcRequestSettings& requestSettings, + bool poll = false, + std::shared_ptr context = nullptr) + { + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, status); + return; + } + + auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] + (TResponse* response, TPlainStatus status) mutable + { + if (response) { + Ydb::Operations::Operation* operation = response->mutable_operation(); + if (!operation->ready() && poll) { + auto action = MakeIntrusive( + operation->id(), + std::move(userResponseCb), + this, + std::move(context), + deferredTimeout, + dbState, + status.Endpoint); + + action->Start(); + } else { + NYdb::NIssue::TIssues opIssues; + NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); + userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), + status.Endpoint, std::move(status.Metadata)}); + } + } else { + userResponseCb(nullptr, status); + } + }; + + RunOnArena( + request, responseCb, rpc, dbState, @@ -329,6 +528,9 @@ class TGRpcConnectionsImpl std::move(context)); } + + + // Run request using discovery endpoint. // Mostly usefull to make calls from credential provider template @@ -375,7 +577,7 @@ class TGRpcConnectionsImpl }; RunDeferred( - std::move(request), + std::forward(request), operationCb, rpc, dbState, @@ -385,6 +587,42 @@ class TGRpcConnectionsImpl context); } + + + +template + void RunDeferredOnArena( + TRequest* request, + TDeferredResultCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + TDuration deferredTimeout, + const TRpcRequestSettings& requestSettings, + std::shared_ptr context = nullptr) + { + auto operationCb = [userResponseCb = std::move(userResponseCb)](Ydb::Operations::Operation* operation, TPlainStatus status) mutable { + if (operation) { + status.SetCostInfo(std::move(*operation->mutable_cost_info())); + userResponseCb(operation->mutable_result(), std::move(status)); + } else { + userResponseCb(nullptr, std::move(status)); + } + }; + + RunDeferredOnArena( + request, + operationCb, + rpc, + dbState, + deferredTimeout, + requestSettings, + true, // poll + context); + } + + + + template class TStream> using TStreamRpc = typename TStream< diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h index d8dd35dbe6b7..392fd35cf793 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h @@ -46,4 +46,18 @@ TProtoRequest MakeOperationRequest(const TRequestSettings& settings) { return request; } + +template +TProtoRequest* MakeRequestOnArena(google::protobuf::Arena* arena) { + assert(arena != nullptr); + return google::protobuf::Arena::CreateMessage(arena); +} + +template +TProtoRequest* MakeOperationRequestOnArena(const TRequestSettings& settings, google::protobuf::Arena* arena) { + auto request = MakeRequestOnArena(arena); + FillOperationParams(settings, *request); + return request; +} + } // namespace NYdb diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index c56b6f5fb530..27aac134b57f 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1021,6 +1021,49 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, } +// TODO: std::pair -> ArenaAllocatedTValue +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( + const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings +) { + auto request = MakeOperationRequestOnArena(settings, arena); + // std::cout << "request->GetArena(): " << request->GetArena() << "\n" + // << "request->mutable_rows()->GetArena(): " << request->mutable_rows()->GetArena() << "\n" + // << "Value->GetArena(): " << rows.second->GetArena() << "\n" + // << "given arena: " << arena << std::endl; + // assert(request->GetArena() == arena); + // assert(request->mutable_rows()->GetArena() == arena); + request->set_table(TStringType{table}); + *request->mutable_rows()->mutable_type() = std::move(rows.first).ExtractProto(); + *request->mutable_rows()->mutable_value() = std::move(*rows.second); + + auto promise = NewPromise(); + + auto extractor = [promise] + (google::protobuf::Any* any, TPlainStatus status) mutable { + Y_UNUSED(any); + TBulkUpsertResult val(TStatus(std::move(status))); + promise.SetValue(std::move(val)); + }; + + // TODO: make sure it is correct; see what server receives + // don't give ownership of request to the function + Connections_->RunDeferredOnArena( + request, + extractor, + &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings)); + + return promise.GetFuture(); +} + + + + TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index de702689de86..a06ad7ef9c24 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -143,6 +143,14 @@ class TTableClient::TImpl: public TClientImplCommon, public * therefore, it is not retryable with the same `rows` object. */ TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings + ); + TAsyncBulkUpsertResult BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings, bool canMove); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index e8dfdace7783..d840843970ac 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1463,6 +1463,14 @@ TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& ta return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); } +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated(const std::string& table, + std::pair&& rows, + google::protobuf::Arena* arena, + const TBulkUpsertSettings& settings) +{ + return Impl_->BulkUpsertUnretryableArenaAllocated(table, std::move(rows), arena, settings); +} + TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { From f958ca8074c02c971132b4d51f9e2f1994982ca4 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 20:19:23 +0000 Subject: [PATCH 10/41] fix: remmove nullptr param from BuildList call in tests --- ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp index 6cfaa4db3603..7d2f1e1593a6 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp @@ -54,7 +54,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(header), ',', "", &columnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0); AssertValuesEqual(builtResult, estimatedResult); } @@ -325,7 +325,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) { TCsvParser parser(std::move(csvHeader), ',', "", &tableColumnTypes, nullptr); parser.BuildLineType(); - TValue builtResult = parser.BuildList(data, "testFile.csv", 0, nullptr); + TValue builtResult = parser.BuildList(data, "testFile.csv", 0); TValue expexctedResult = TValueBuilder().BeginList() .AddListItem().BeginStruct() From 6d1a5786b4f54c19818e6ea1e31c09b614b76a44 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 20:21:44 +0000 Subject: [PATCH 11/41] refactor: remove explicit default parameter from BuildList call --- ydb/public/lib/ydb_cli/import/import.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 80725f142bba..8a7d49da341e 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1226,7 +1226,7 @@ TStatus TImportFileClient::TImpl::UpsertCsvByBlocks(const TString& filePath, auto upsertCsvFunc = [&](std::vector&& buffer) { auto buildFunc = [&jobsInflight, &parser, buffer = std::move(buffer), &filePath, this]() mutable { try { - return parser.BuildList(buffer, filePath, std::nullopt); + return parser.BuildList(buffer, filePath); } catch (const std::exception& e) { if (!Failed.exchange(true)) { ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); From 8b040f14ee8446521403d0684af27b681235ad0c Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 1 May 2025 20:44:45 +0000 Subject: [PATCH 12/41] feat: add several TODOs --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 2 ++ ydb/public/lib/ydb_cli/import/import.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 5497b24ed73b..4f92b94e2c25 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -611,6 +611,8 @@ void TCsvParser::ProcessCsvLine( } TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { + // TODO: create Ydb::Value on the arena to avoid copying here + // TODO: (despite std::move, we copy from non-arena allocated Ydb::Value into arena-allocated structItems) TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); *structItems->Add() = std::move(builtValue).ExtractProto(); ++typeParserIt; diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 8a7d49da341e..59ea617e9be0 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1080,6 +1080,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; + // TODO: create arena here and pass it to UpsertTValueBufferOnArena? // UpsertTValueBuffer(dbPath, std::move(buildFunc)) UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { From 42a9f52e5ba07ae809e39de68eba53c3f295dcf3 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 2 May 2025 14:30:12 +0000 Subject: [PATCH 13/41] feat: name file-processing threads in their thread pool --- ydb/public/lib/ydb_cli/import/import.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 59ea617e9be0..8eee1f903402 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -679,7 +679,7 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons auto start = TInstant::Now(); - TThreadPool jobPool; + TThreadPool jobPool(IThreadPool::TParams().SetThreadNamePrefix("FileWorker")); jobPool.Start(filePathsSize); TVector> asyncResults; @@ -699,6 +699,7 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons for (size_t fileOrderNumber = 0; fileOrderNumber < filePathsSize; ++fileOrderNumber) { const auto& filePath = filePaths[fileOrderNumber]; std::shared_ptr progressFile = LoadOrStartImportProgress(filePath); + auto func = [&, fileOrderNumber, progressFile, this] { std::unique_ptr fileInput; std::optional fileSizeHint; From a8a0d36107e10ae797d4e566de9211c06c02094f Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 2 May 2025 14:30:38 +0000 Subject: [PATCH 14/41] feat: add TODO about arena-based TValueBuilderImpl --- ydb/public/sdk/cpp/src/client/value/value.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 17f22a43d755..2105ccc8a957 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2024,6 +2024,7 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// +// TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; using TMembersMap = std::map; From 17bcffffa5b861390772f60687f3963a7e0b123e Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 2 May 2025 15:34:29 +0000 Subject: [PATCH 15/41] refactor: replace pair of TType and Ydb::Value* with type TArenaAllocatedValue --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 7 ++-- ydb/public/lib/ydb_cli/common/csv_parser.h | 2 +- ydb/public/lib/ydb_cli/import/import.cpp | 10 ++--- .../include/ydb-cpp-sdk/client/table/table.h | 2 +- .../include/ydb-cpp-sdk/client/value/value.h | 18 ++++++++ .../src/client/table/impl/table_client.cpp | 8 ++-- .../cpp/src/client/table/impl/table_client.h | 2 +- ydb/public/sdk/cpp/src/client/table/table.cpp | 2 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 42 +++++++++++++++++++ 9 files changed, 75 insertions(+), 18 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 4f92b94e2c25..92a2933873ef 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -554,14 +554,13 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f -// TODO: std::pair -> ArenaAllocatedTValue -std::pair TCsvParser::BuildListOnArena( +TArenaAllocatedValue TCsvParser::BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, std::optional row ) const { - assert(arena != nullptr); + Y_ASSERT(arena != nullptr); std::vector> columnTypeParsers; columnTypeParsers.reserve(ResultColumnCount); @@ -582,7 +581,7 @@ std::pair TCsvParser::BuildListOnArena( } // Return a TValue that references the arena-allocated message - return std::make_pair(ResultListType.value(), value); + return TArenaAllocatedValue(ResultListType.value(), value); } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index af0a8135b768..85203852bd92 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -41,7 +41,7 @@ class TCsvParser { TValue BuildList(const std::vector& lines, const TString& filename, std::optional row = std::nullopt) const; - std::pair BuildListOnArena( + TArenaAllocatedValue BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 8eee1f903402..a9fd059a2479 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -560,7 +560,7 @@ class TImportFileClient::TImpl { TAsyncStatus UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc); TAsyncStatus UpsertTValueBufferOnArena( - const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc); + const TString& dbPath, std::function&& buildFunc); TStatus UpsertJson(IInputStream &input, const TString &dbPath, std::optional inputSizeHint, ProgressCallbackFunc & progressCallback); @@ -938,15 +938,13 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, -// TODO: std::pair -> ArenaAllocatedTValue inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( - const TString& dbPath, std::function(google::protobuf::Arena*)>&& buildFunc) { - using YdbValuePair = std::pair; + const TString& dbPath, std::function&& buildFunc) { auto arena = std::make_shared(); // For the first attempt values are built before acquiring request inflight semaphore - std::optional prebuiltValue = buildFunc(arena.get()); + std::optional prebuiltValue = buildFunc(arena.get()); auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue), arena = std::move(arena)] @@ -954,7 +952,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue, arena]() { // For every retry attempt after first request build value from strings again // to prevent copying data in retryFunc in a happy way when there is only one request - YdbValuePair builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); + TArenaAllocatedValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; return tableClient.BulkUpsertUnretryableArenaAllocated( dbPath, std::move(builtValue), arena.get(), UpsertSettings) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 65e84b17aed1..f2fdd5a64758 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1269,7 +1269,7 @@ class TTableClient { TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index e83b2e3558ad..425baa864569 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -292,6 +292,24 @@ class TValue { std::shared_ptr Impl_; }; +class TArenaAllocatedValue { +public: + TArenaAllocatedValue(const TType& type, Ydb::Value* value); + + const TType& GetType() const; + TType& GetType(); + + const Ydb::Value* GetProto() const; + Ydb::Value* GetProto(); + + Ydb::Value&& ExtractProto() &&; + +private: + class TImpl; + std::shared_ptr Impl_; +}; + + class TValueParser : public TMoveOnly { friend class TResultSetParser; public: diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 27aac134b57f..79a448fe3289 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1021,10 +1021,9 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, } -// TODO: std::pair -> ArenaAllocatedTValue TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ) { @@ -1036,8 +1035,9 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( // assert(request->GetArena() == arena); // assert(request->mutable_rows()->GetArena() == arena); request->set_table(TStringType{table}); - *request->mutable_rows()->mutable_type() = std::move(rows.first).ExtractProto(); - *request->mutable_rows()->mutable_value() = std::move(*rows.second); + // TODO: Ydb::Type still gets copied because request is arena-allocated and rows' Type is not + *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); + *request->mutable_rows()->mutable_value() = std::move(rows).ExtractProto(); auto promise = NewPromise(); diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index a06ad7ef9c24..8ff8d83a3281 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -146,7 +146,7 @@ class TTableClient::TImpl: public TClientImplCommon, public TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index d840843970ac..44ae34a2aad9 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1464,7 +1464,7 @@ TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& ta } TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated(const std::string& table, - std::pair&& rows, + TArenaAllocatedValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 2105ccc8a957..adcb001622d9 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -1060,6 +1060,24 @@ class TValue::TImpl { Ydb::Value ProtoValue_; }; +/** +* Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. +*/ +class TArenaAllocatedValue::TImpl { +public: + TImpl(const TType& type, Ydb::Value* value) + : Type_(type) + , ArenaAllocatedProtoValue_(value) { + // assert that the value is indeed arena-allocated + Y_ASSERT(ArenaAllocatedProtoValue_ != nullptr); + Y_ASSERT(ArenaAllocatedProtoValue_->GetArena() != nullptr); + } + + // TODO: make Ydb::Type also arena-allocated? + TType Type_; + Ydb::Value* ArenaAllocatedProtoValue_; +}; + //////////////////////////////////////////////////////////////////////////////// TValue::TValue(const TType& type, const Ydb::Value& valueProto) @@ -1088,6 +1106,30 @@ Ydb::Value&& TValue::ExtractProto() && { return std::move(Impl_->ProtoValue_); } + +TArenaAllocatedValue::TArenaAllocatedValue(const TType& type, Ydb::Value* value) + : Impl_(new TImpl(type, value)) {} + +const TType& TArenaAllocatedValue::GetType() const { + return Impl_->Type_; +} + +TType& TArenaAllocatedValue::GetType() { + return Impl_->Type_; +} + +const Ydb::Value* TArenaAllocatedValue::GetProto() const { + return Impl_->ArenaAllocatedProtoValue_; +} + +Ydb::Value* TArenaAllocatedValue::GetProto() { + return Impl_->ArenaAllocatedProtoValue_; +} + +Ydb::Value&& TArenaAllocatedValue::ExtractProto() && { + return std::move(*Impl_->ArenaAllocatedProtoValue_); +} + //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From 3f0daae4443fe7f4e253dea25a52314fe8a702b3 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 8 May 2025 17:04:48 +0000 Subject: [PATCH 16/41] feat: build Ydb::Value on the arena in TCsvParser::BuildListOnArena Notes: 1. Creates custom converter in `csv_parser.cpp` to build `Ydb::Value` on the arena. 2. Creates new implementer of `TValueBuilderBase` to build `TArenaAllocatedValue` in `value.cpp`/`value.h`. 3. Introduces `TValueHolder` interface with two implementations: - `StackAllocatedValueHolder` with stack-allocated `Ydb::Value`. - `TArenaAllocatedValueHolder` with arena-allocated `Ydb::Value`. Current drawbacks: - `TValueHolder` has virtual calls, which is costy; try to employ static polymorphism. - `FieldToValueOnArena` creates `TCsvToYdbOnArenaConverter`, which is a copy of `TCsvToYdbConverter`. Verdict: didn't produce any performance improvements (likely due to virtual calls in `TValueHolder`). --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 464 +++++++++++++++++- ydb/public/lib/ydb_cli/common/csv_parser.h | 4 +- .../include/ydb-cpp-sdk/client/value/fwd.h | 1 + .../include/ydb-cpp-sdk/client/value/value.h | 10 + ydb/public/sdk/cpp/src/client/value/value.cpp | 97 +++- 5 files changed, 558 insertions(+), 18 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 92a2933873ef..badf392356b9 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -15,9 +15,7 @@ class TCsvToYdbConverter { public: explicit TCsvToYdbConverter(TTypeParser& parser, const std::optional& nullValue) : Parser(parser) - , NullValue(nullValue) - { - } + , NullValue(nullValue) {} template && std::is_signed_v, std::nullptr_t> = nullptr> static i64 StringToArithmetic(const TString& token, size_t& cnt) { @@ -397,6 +395,415 @@ class TCsvToYdbConverter { TValueBuilder Builder; }; + + +// TODO: merge with the impl above +class TCsvToYdbOnArenaConverter { +public: + explicit TCsvToYdbOnArenaConverter(TTypeParser& parser, const std::optional& nullValue, google::protobuf::Arena* arena) + : Parser(parser) + , NullValue(nullValue) + , Builder(arena) {} + + template && std::is_signed_v, std::nullptr_t> = nullptr> + static i64 StringToArithmetic(const TString& token, size_t& cnt) { + return std::stoll(token, &cnt); + } + + template && std::is_unsigned_v, std::nullptr_t> = nullptr> + static ui64 StringToArithmetic(const TString& token, size_t& cnt) { + return std::stoull(token, &cnt); + } + + template , std::nullptr_t> = nullptr> + static float StringToArithmetic(const TString& token, size_t& cnt) { + return std::stof(token, &cnt); + } + + template , std::nullptr_t> = nullptr> + static double StringToArithmetic(const TString& token, size_t& cnt) { + return std::stod(token, &cnt); + } + + template + T GetArithmetic(const TString& token) const { + size_t cnt; + try { + auto value = StringToArithmetic(token, cnt); + if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { + throw yexception(); + } + return static_cast(value); + } catch (std::exception& e) { + throw TCsvParseException() << "Expected " << Parser.GetPrimitive() << " value, received: \"" << token << "\"."; + } + } + + void BuildPrimitive(const TString& token) { + switch (Parser.GetPrimitive()) { + case EPrimitiveType::Int8: + Builder.Int8(GetArithmetic(token)); + break; + case EPrimitiveType::Int16: + Builder.Int16(GetArithmetic(token)); + break; + case EPrimitiveType::Int32: + Builder.Int32(GetArithmetic(token)); + break; + case EPrimitiveType::Int64: + Builder.Int64(GetArithmetic(token)); + break; + case EPrimitiveType::Uint8: + Builder.Uint8(GetArithmetic(token)); + break; + case EPrimitiveType::Uint16: + Builder.Uint16(GetArithmetic(token)); + break; + case EPrimitiveType::Uint32: + Builder.Uint32(GetArithmetic(token)); + break; + case EPrimitiveType::Uint64: + Builder.Uint64(GetArithmetic(token)); + break; + case EPrimitiveType::Bool: + Builder.Bool(GetBool(token)); + break; + case EPrimitiveType::String: + Builder.String(token); + break; + case EPrimitiveType::Utf8: + Builder.Utf8(token); + break; + case EPrimitiveType::Json: + Builder.Json(token); + break; + case EPrimitiveType::JsonDocument: + Builder.JsonDocument(token); + break; + case EPrimitiveType::Yson: + Builder.Yson(token); + break; + case EPrimitiveType::Uuid: + Builder.Uuid(TUuidValue{token}); + break; + case EPrimitiveType::Float: + Builder.Float(GetArithmetic(token)); + break; + case EPrimitiveType::Double: + Builder.Double(GetArithmetic(token)); + break; + case EPrimitiveType::DyNumber: + Builder.DyNumber(token); + break; + case EPrimitiveType::Date: { + TInstant date; + if (!TInstant::TryParseIso8601(token, date)) { + date = TInstant::Days(GetArithmetic(token)); + } + Builder.Date(date); + break; + } + case EPrimitiveType::Datetime: { + TInstant datetime; + if (!TInstant::TryParseIso8601(token, datetime)) { + datetime = TInstant::Seconds(GetArithmetic(token)); + } + Builder.Datetime(datetime); + break; + } + case EPrimitiveType::Timestamp: { + TInstant timestamp; + if (!TInstant::TryParseIso8601(token, timestamp)) { + timestamp = TInstant::MicroSeconds(GetArithmetic(token)); + } + Builder.Timestamp(timestamp); + break; + } + case EPrimitiveType::Interval: + Builder.Interval(GetArithmetic(token)); + break; + case EPrimitiveType::Date32: { + TInstant date; + if (TInstant::TryParseIso8601(token, date)) { + Builder.Date32(date.Days()); + } else { + Builder.Date32(GetArithmetic(token)); + } + break; + } + case EPrimitiveType::Datetime64: { + TInstant date; + if (TInstant::TryParseIso8601(token, date)) { + Builder.Datetime64(date.Seconds()); + } else { + Builder.Datetime64(GetArithmetic(token)); + } + break; + } + case EPrimitiveType::Timestamp64: { + TInstant date; + if (TInstant::TryParseIso8601(token, date)) { + Builder.Timestamp64(date.MicroSeconds()); + } else { + Builder.Timestamp64(GetArithmetic(token)); + } + break; + } + case EPrimitiveType::Interval64: + Builder.Interval64(GetArithmetic(token)); + break; + case EPrimitiveType::TzDate: + Builder.TzDate(token); + break; + case EPrimitiveType::TzDatetime: + Builder.TzDatetime(token); + break; + case EPrimitiveType::TzTimestamp: + Builder.TzTimestamp(token); + break; + default: + throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); + } + } + + void BuildValue(const TStringBuf& token) { + switch (Parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: { + BuildPrimitive(std::string{token}); + break; + } + case TTypeParser::ETypeKind::Decimal: { + Builder.Decimal(TDecimalValue(std::string(token), Parser.GetDecimal().Precision, Parser.GetDecimal().Scale)); + break; + } + case TTypeParser::ETypeKind::Optional: { + Parser.OpenOptional(); + if (NullValue && token == NullValue) { + Builder.EmptyOptional(GetType()); + } else { + Builder.BeginOptional(); + BuildValue(token); + Builder.EndOptional(); + } + Parser.CloseOptional(); + break; + } + case TTypeParser::ETypeKind::Null: { + EnsureNull(token); + break; + } + case TTypeParser::ETypeKind::Void: { + EnsureNull(token); + break; + } + case TTypeParser::ETypeKind::Tagged: { + Parser.OpenTagged(); + Builder.BeginTagged(Parser.GetTag()); + BuildValue(token); + Builder.EndTagged(); + Parser.CloseTagged(); + break; + } + case TTypeParser::ETypeKind::Pg: { + if (NullValue && token == NullValue) { + Builder.Pg(TPgValue(TPgValue::VK_NULL, {}, Parser.GetPg())); + } else { + Builder.Pg(TPgValue(TPgValue::VK_TEXT, TString(token), Parser.GetPg())); + } + break; + } + default: + throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); + } + } + + void BuildType(TTypeBuilder& typeBuilder) { + switch (Parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: + typeBuilder.Primitive(Parser.GetPrimitive()); + break; + + case TTypeParser::ETypeKind::Decimal: + typeBuilder.Decimal(Parser.GetDecimal()); + break; + + case TTypeParser::ETypeKind::Optional: + Parser.OpenOptional(); + typeBuilder.BeginOptional(); + BuildType(typeBuilder); + typeBuilder.EndOptional(); + Parser.CloseOptional(); + break; + + case TTypeParser::ETypeKind::Tagged: + Parser.OpenTagged(); + typeBuilder.BeginTagged(Parser.GetTag()); + BuildType(typeBuilder); + typeBuilder.EndTagged(); + Parser.CloseTagged(); + break; + + case TTypeParser::ETypeKind::Pg: + typeBuilder.Pg(Parser.GetPg()); + break; + + default: + throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); + } + } + + TType GetType() { + TTypeBuilder typeBuilder; + BuildType(typeBuilder); + return typeBuilder.Build(); + } + + bool GetBool(const TString& token) const { + if (token == "true") { + return true; + } + if (token == "false") { + return false; + } + throw TCsvParseException() << "Expected bool value: \"true\" or \"false\", received: \"" << token << "\"."; + } + + void EnsureNull(const TStringBuf& token) const { + if (!NullValue) { + throw TCsvParseException() << "Expected null value instead of \"" << token << "\", but null value is not set."; + } + if (token != NullValue) { + throw TCsvParseException() << "Expected null value: \"" << NullValue << "\", received: \"" << token << "\"."; + } + } + + template + bool TryParseArithmetic(const TString& token) const { + size_t cnt; + try { + auto value = StringToArithmetic(token, cnt); + if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { + return false; + } + } catch (std::exception& e) { + return false; + } + return true; + } + + bool TryParseBool(const TString& token) const { + TString tokenLowerCase = to_lower(token); + return tokenLowerCase == "true" || tokenLowerCase == "false"; + } + + bool TryParsePrimitive(const TString& token) { + switch (Parser.GetPrimitive()) { + case EPrimitiveType::Uint8: + return TryParseArithmetic(token) && !token.StartsWith('-'); + case EPrimitiveType::Uint16: + return TryParseArithmetic(token) && !token.StartsWith('-');; + case EPrimitiveType::Uint32: + return TryParseArithmetic(token) && !token.StartsWith('-');; + case EPrimitiveType::Uint64: + return TryParseArithmetic(token) && !token.StartsWith('-');; + case EPrimitiveType::Int8: + return TryParseArithmetic(token); + case EPrimitiveType::Int16: + return TryParseArithmetic(token); + case EPrimitiveType::Int32: + return TryParseArithmetic(token); + case EPrimitiveType::Int64: + return TryParseArithmetic(token); + case EPrimitiveType::Bool: + return TryParseBool(token); + case EPrimitiveType::Json: + return token.StartsWith('{') && token.EndsWith('}'); + break; + case EPrimitiveType::JsonDocument: + break; + case EPrimitiveType::Yson: + break; + case EPrimitiveType::Uuid: + static std::regex uuidRegexTemplate("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); + return std::regex_match(token.c_str(), uuidRegexTemplate); + case EPrimitiveType::Float: + return TryParseArithmetic(token); + case EPrimitiveType::Double: + return TryParseArithmetic(token); + case EPrimitiveType::DyNumber: + break; + case EPrimitiveType::Date: { + TInstant date; + return TInstant::TryParseIso8601(token, date) && token.length() <= 10; + } + case EPrimitiveType::Datetime: { + TInstant datetime; + return TInstant::TryParseIso8601(token, datetime) && token.length() <= 19; + } + case EPrimitiveType::Timestamp: { + TInstant timestamp; + return TInstant::TryParseIso8601(token, timestamp) || TryParseArithmetic(token); + } + case EPrimitiveType::Interval: + break; + case EPrimitiveType::Date32: { + TInstant date; + return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); + } + case EPrimitiveType::Datetime64: { + TInstant date; + return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); + } + case EPrimitiveType::Timestamp64: { + TInstant date; + return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); + } + case EPrimitiveType::Interval64: + return TryParseArithmetic(token); + case EPrimitiveType::TzDate: + break; + case EPrimitiveType::TzDatetime: + break; + case EPrimitiveType::TzTimestamp: + break; + default: + throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); + } + return false; + } + + bool TryParseValue(const TStringBuf& token, TPossibleType& possibleType) { + if (NullValue && token == NullValue) { + possibleType.SetHasNulls(true); + return true; + } + possibleType.SetHasNonNulls(true); + switch (Parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: { + return TryParsePrimitive(TString(token)); + } + case TTypeParser::ETypeKind::Decimal: { + break; + } + default: + throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); + } + return false; + } + + TArenaAllocatedValue Convert(const TStringBuf& token) { + BuildValue(token); + return Builder.BuildArenaAllocatedValue(); + } + +private: + TTypeParser& Parser; + const std::optional NullValue = ""; + TArenaAllocatedValueBuilder Builder; +}; + + + TCsvParseException FormatError(const std::exception& inputError, const TCsvParser::TParseMetadata& meta, const std::optional& columnName = {}) { @@ -427,6 +834,23 @@ TValue FieldToValue(TTypeParser& parser, } } +TArenaAllocatedValue FieldToValueOnArena( + TTypeParser& parser, + const TStringBuf& token, + const std::optional& nullValue, + const TCsvParser::TParseMetadata& meta, + const std::string& columnName, + google::protobuf::Arena* arena +) { + Y_ASSERT(arena != nullptr); + try { + TCsvToYdbOnArenaConverter converter(parser, nullValue, arena); + return converter.Convert(token); + } catch (std::exception& e) { + throw FormatError(e, meta, columnName); + } +} + TStringBuf Consume(NCsvFormat::CsvSplitter& splitter, const TCsvParser::TParseMetadata& meta, const TString& columnName) { @@ -542,7 +966,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, /* arena = */ nullptr); if (row.has_value()) { ++row.value(); } @@ -574,7 +998,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, arena); if (row.has_value()) { ++row.value(); } @@ -584,7 +1008,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( return TArenaAllocatedValue(ResultListType.value(), value); } - +std::atomic_bool print = true; // Helper method to process a single CSV line void TCsvParser::ProcessCsvLine( @@ -592,8 +1016,9 @@ void TCsvParser::ProcessCsvLine( google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename) const { - + const TString& filename, + google::protobuf::Arena* arena +) const { NCsvFormat::CsvSplitter splitter(line, Delimeter); auto* structItems = listItems->Add()->mutable_items(); structItems->Reserve(ResultColumnCount); @@ -612,8 +1037,27 @@ void TCsvParser::ProcessCsvLine( if (!*skipIt) { // TODO: create Ydb::Value on the arena to avoid copying here // TODO: (despite std::move, we copy from non-arena allocated Ydb::Value into arena-allocated structItems) - TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); - *structItems->Add() = std::move(builtValue).ExtractProto(); + if (arena != nullptr) { + TArenaAllocatedValue builtValue = FieldToValueOnArena( + *typeParserIt->get(), nextField, NullValue, meta, *headerIt, arena); + + // if (print.load()) { + // Cerr << "arena: " << static_cast(arena) << Endl; + // Cerr << "builtValue.GetProto()->GetArena(): " << static_cast(builtValue.GetProto()->GetArena()) << Endl; + // Cerr << "structItems->GetArena(): " << static_cast(structItems->GetArena()) << Endl; + // Cerr << "structItems->Add()->GetArena(): " << static_cast(structItems->Add()->GetArena()) << Endl; + // print.store(false); + // } + // Y_ASSERT(arena != nullptr); + // Y_ASSERT(builtValue.GetProto()->GetArena() != nullptr); + // Y_ASSERT(builtValue.GetProto()->GetArena() == structItems->GetArena()); + + *structItems->Add() = std::move(builtValue).ExtractProto(); + } else { + TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); + *structItems->Add() = std::move(builtValue).ExtractProto(); + } + ++typeParserIt; } ++headerIt; diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 85203852bd92..fc2cc6107663 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -78,7 +78,9 @@ class TCsvParser { google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename) const; + const TString& filename, + google::protobuf::Arena* arena + ) const; }; } diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h index 06eb71ac0703..9205080b7f7e 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h @@ -8,6 +8,7 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; +class TArenaAllocatedValueBuilder; template class TValueBuilderBase; diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 425baa864569..f9ab78523436 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -2,6 +2,7 @@ #include "fwd.h" +#include #include #include @@ -546,6 +547,8 @@ class TValueBuilderBase : public TMoveOnly { TValueBuilderBase(); + TValueBuilderBase(google::protobuf::Arena* arena); + TValueBuilderBase(const TType& type); TValueBuilderBase(Ydb::Type& type, Ydb::Value& value); @@ -567,6 +570,13 @@ class TValueBuilder : public TValueBuilderBase { TValue Build(); }; +class TArenaAllocatedValueBuilder : public TValueBuilderBase { +public: + TArenaAllocatedValueBuilder(google::protobuf::Arena* arena); + + TArenaAllocatedValue BuildArenaAllocatedValue(); +}; + } // namespace NYdb template<> diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index adcb001622d9..59409f9d0649 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2066,6 +2066,58 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// +class TValueHolder { +public: + virtual Ydb::Value& Value() = 0; + virtual Ydb::Value* ValuePtr() = 0; + virtual ~TValueHolder() = default; +}; + +class StackAllocatedValueHolder : public TValueHolder { +public: + Ydb::Value& Value() override { + return ProtoValue_; + } + + Ydb::Value* ValuePtr() override { + return &ProtoValue_; + } + +private: + Ydb::Value ProtoValue_; +}; + +class ArenaAllocatedValueHolder : public TValueHolder { +public: + ArenaAllocatedValueHolder(google::protobuf::Arena* arena) + // value is created lazily on first access + : ArenaAllocatedValue_(nullptr) + , Arena_(arena) { + Y_ASSERT(arena != nullptr); + } + + Ydb::Value& Value() override { + createValueIfAbsent(); + return *ArenaAllocatedValue_; + } + + Ydb::Value* ValuePtr() override { + createValueIfAbsent(); + return ArenaAllocatedValue_; + } + +private: + void createValueIfAbsent() { + if (ArenaAllocatedValue_ == nullptr) { + ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); + } + } + +private: + Ydb::Value* ArenaAllocatedValue_; + google::protobuf::Arena* Arena_; +}; + // TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; @@ -2092,19 +2144,29 @@ class TValueBuilderImpl { public: TValueBuilderImpl() : TypeBuilder_() + , ValueHolder_(std::make_unique()) { - PushPath(ProtoValue_); + PushPath(ValueHolder_->Value()); + } + + TValueBuilderImpl(google::protobuf::Arena* arena) + : TypeBuilder_() + , ValueHolder_(std::make_unique(arena)) + { + PushPath(ValueHolder_->Value()); } TValueBuilderImpl(const TType& type) : TypeBuilder_() + , ValueHolder_(std::make_unique()) { - PushPath(ProtoValue_); + PushPath(ValueHolder_->Value()); GetType().CopyFrom(type.GetProto()); } TValueBuilderImpl(Ydb::Type& type, Ydb::Value& value) : TypeBuilder_(type) + , ValueHolder_(std::make_unique()) { PushPath(value); } @@ -2116,15 +2178,20 @@ class TValueBuilderImpl { } } - TValue BuildValue() { + TValue BuildStackAllocatedValue() { CheckValue(); Ydb::Value value; - value.Swap(&ProtoValue_); + value.Swap(&ValueHolder_->Value()); return TValue(TypeBuilder_.Build(), std::move(value)); } + TArenaAllocatedValue BuildArenaAllocatedValue() { + CheckValue(); + return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_->ValuePtr()); + } + void Bool(bool value) { FillPrimitiveType(EPrimitiveType::Bool); GetValue().set_bool_value(value); @@ -2832,10 +2899,10 @@ class TValueBuilderImpl { } private: - //TTypeBuilder TypeBuilder_; TTypeBuilder::TImpl TypeBuilder_; - Ydb::Value ProtoValue_; + // Ydb::Value ProtoValue_; + std::unique_ptr ValueHolder_; std::map StructsMap_; TStackVec Path_; @@ -2854,6 +2921,12 @@ template TValueBuilderBase::TValueBuilderBase() : Impl_(new TValueBuilderImpl()) {} + +template +TValueBuilderBase::TValueBuilderBase(google::protobuf::Arena* arena) + : Impl_(new TValueBuilderImpl(arena)) {} + + template TValueBuilderBase::TValueBuilderBase(const TType& type) : Impl_(new TValueBuilderImpl(type)) {} @@ -3411,6 +3484,7 @@ TDerived& TValueBuilderBase::EndTagged() { template class TValueBuilderBase; template class TValueBuilderBase; +template class TValueBuilderBase; //////////////////////////////////////////////////////////////////////////////// @@ -3421,7 +3495,16 @@ TValueBuilder::TValueBuilder(const TType& type) : TValueBuilderBase(type) {} TValue TValueBuilder::Build() { - return Impl_->BuildValue(); + return Impl_->BuildStackAllocatedValue(); +} + +//////////////////////////////////////////////////////////////////////////////// + +TArenaAllocatedValueBuilder::TArenaAllocatedValueBuilder(google::protobuf::Arena* arena) + : TValueBuilderBase(arena) {} + +TArenaAllocatedValue TArenaAllocatedValueBuilder::BuildArenaAllocatedValue() { + return Impl_->BuildArenaAllocatedValue(); } } // namespace NYdb From 4b6503a3044bd5ad996654024adf15ca8a0020fe Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Thu, 8 May 2025 19:20:22 +0000 Subject: [PATCH 17/41] feat: select `TValueHolder` in compile time when call `TCsvParser::BuildListOnArena` This commit fixes the issue with virtual calls in `TValueHolder`, integrated in the commit 396d98fb36f9f6ea3a15d08ee118d5a4452d3d60. Average speed: 254-257 MiB/s (33.1-33.6 sec spent) There seems to be no performance difference in terms of upload speed. The only difference is that the percental fraction of CPU load that `TCsvParser::BuildListOnArena` method takes: 1. this commit: 42.7% of the total CPU time. 2. commit 396d98fb36: 43.1% of the total CPU time. 3. without both commits: 49.3% of the total CPU time. --- ydb/public/sdk/cpp/client/ydb_value/fwd.h | 1 + .../ydb-cpp-sdk/client/params/params.h | 2 +- .../include/ydb-cpp-sdk/client/value/fwd.h | 2 +- .../include/ydb-cpp-sdk/client/value/value.h | 68 ++- .../sdk/cpp/src/client/params/params.cpp | 2 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 498 ++++++++---------- 6 files changed, 280 insertions(+), 293 deletions(-) diff --git a/ydb/public/sdk/cpp/client/ydb_value/fwd.h b/ydb/public/sdk/cpp/client/ydb_value/fwd.h index f9f5df1abf5c..d1b26e0091fa 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/fwd.h +++ b/ydb/public/sdk/cpp/client/ydb_value/fwd.h @@ -8,6 +8,7 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; +class TArenaAllocatedValueBuilder; template class TValueBuilderBase; diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h index 71749a9c1adb..31a0d904a821 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h @@ -60,7 +60,7 @@ class TParams { std::shared_ptr Impl_; }; -class TParamValueBuilder : public TValueBuilderBase { +class TParamValueBuilder : public TValueBuilderBase { friend class TParamsBuilder; public: TParamsBuilder& Build(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h index 9205080b7f7e..24a6913987b7 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h @@ -10,7 +10,7 @@ class TValueParser; class TValueBuilder; class TArenaAllocatedValueBuilder; -template +template class TValueBuilderBase; } // namespace NYdb diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index f9ab78523436..5883ee6f8d29 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -7,6 +7,7 @@ #include #include +#include namespace Ydb { class Type; @@ -173,6 +174,7 @@ std::string FormatType(const TType& type); //! To create complex type, corresponding scope should be opened by Begin*/End* calls //! To create complex repeated type, Add* should be called at least once class TTypeBuilder : public TMoveOnly { + template friend class TValueBuilderImpl; public: TTypeBuilder(TTypeBuilder&&); @@ -430,9 +432,59 @@ class TValueParser : public TMoveOnly { std::unique_ptr Impl_; }; +template class TValueBuilderImpl; -template + +class StackAllocatedValueHolder { +public: + Ydb::Value& Value() { + return ProtoValue_; + } + + Ydb::Value* ValuePtr() { + return &ProtoValue_; + } + +private: + Ydb::Value ProtoValue_; +}; + +class ArenaAllocatedValueHolder { +public: + ArenaAllocatedValueHolder(google::protobuf::Arena* arena) + // value is created lazily on first access + : ArenaAllocatedValue_(nullptr) + , Arena_(arena) { + Y_ASSERT(arena != nullptr); + } + + Ydb::Value& Value() { + createValueIfAbsent(); + return *ArenaAllocatedValue_; + } + + Ydb::Value* ValuePtr() { + createValueIfAbsent(); + return ArenaAllocatedValue_; + } + +private: + void createValueIfAbsent() { + if (ArenaAllocatedValue_ == nullptr) { + ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); + } + } + +private: + Ydb::Value* ArenaAllocatedValue_; + google::protobuf::Arena* Arena_; +}; + + + + +template class TValueBuilderBase : public TMoveOnly { friend TDerived; public: @@ -545,23 +597,21 @@ class TValueBuilderBase : public TMoveOnly { protected: TValueBuilderBase(TValueBuilderBase&&); - TValueBuilderBase(); - - TValueBuilderBase(google::protobuf::Arena* arena); + TValueBuilderBase(ValueHolder holder); - TValueBuilderBase(const TType& type); + TValueBuilderBase(ValueHolder holder, const TType& type); - TValueBuilderBase(Ydb::Type& type, Ydb::Value& value); + TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value); ~TValueBuilderBase(); void CheckValue(); private: - std::unique_ptr Impl_; + std::unique_ptr> Impl_; }; -class TValueBuilder : public TValueBuilderBase { +class TValueBuilder : public TValueBuilderBase { public: TValueBuilder(); @@ -570,7 +620,7 @@ class TValueBuilder : public TValueBuilderBase { TValue Build(); }; -class TArenaAllocatedValueBuilder : public TValueBuilderBase { +class TArenaAllocatedValueBuilder : public TValueBuilderBase { public: TArenaAllocatedValueBuilder(google::protobuf::Arena* arena); diff --git a/ydb/public/sdk/cpp/src/client/params/params.cpp b/ydb/public/sdk/cpp/src/client/params/params.cpp index 25cb1a71c93c..87aec4f6e38a 100644 --- a/ydb/public/sdk/cpp/src/client/params/params.cpp +++ b/ydb/public/sdk/cpp/src/client/params/params.cpp @@ -130,7 +130,7 @@ class TParamsBuilder::TImpl { //////////////////////////////////////////////////////////////////////////////// TParamValueBuilder::TParamValueBuilder(TParamsBuilder& owner, Ydb::Type& typeProto, Ydb::Value& valueProto) - : TValueBuilderBase(typeProto, valueProto) + : TValueBuilderBase(StackAllocatedValueHolder(), typeProto, valueProto) , Owner_(owner) , Finished_(false) {} diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 59409f9d0649..d3fa0ab97eb5 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2066,59 +2066,8 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// -class TValueHolder { -public: - virtual Ydb::Value& Value() = 0; - virtual Ydb::Value* ValuePtr() = 0; - virtual ~TValueHolder() = default; -}; - -class StackAllocatedValueHolder : public TValueHolder { -public: - Ydb::Value& Value() override { - return ProtoValue_; - } - - Ydb::Value* ValuePtr() override { - return &ProtoValue_; - } - -private: - Ydb::Value ProtoValue_; -}; - -class ArenaAllocatedValueHolder : public TValueHolder { -public: - ArenaAllocatedValueHolder(google::protobuf::Arena* arena) - // value is created lazily on first access - : ArenaAllocatedValue_(nullptr) - , Arena_(arena) { - Y_ASSERT(arena != nullptr); - } - - Ydb::Value& Value() override { - createValueIfAbsent(); - return *ArenaAllocatedValue_; - } - - Ydb::Value* ValuePtr() override { - createValueIfAbsent(); - return ArenaAllocatedValue_; - } - -private: - void createValueIfAbsent() { - if (ArenaAllocatedValue_ == nullptr) { - ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); - } - } - -private: - Ydb::Value* ArenaAllocatedValue_; - google::protobuf::Arena* Arena_; -}; - // TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called +template class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; using TMembersMap = std::map; @@ -2142,31 +2091,24 @@ class TValueBuilderImpl { }; public: - TValueBuilderImpl() - : TypeBuilder_() - , ValueHolder_(std::make_unique()) - { - PushPath(ValueHolder_->Value()); - } - - TValueBuilderImpl(google::protobuf::Arena* arena) + TValueBuilderImpl(ValueHolder holder) : TypeBuilder_() - , ValueHolder_(std::make_unique(arena)) + , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_->Value()); + PushPath(ValueHolder_.Value()); } - TValueBuilderImpl(const TType& type) + TValueBuilderImpl(ValueHolder holder, const TType& type) : TypeBuilder_() - , ValueHolder_(std::make_unique()) + , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_->Value()); + PushPath(ValueHolder_.Value()); GetType().CopyFrom(type.GetProto()); } - TValueBuilderImpl(Ydb::Type& type, Ydb::Value& value) + TValueBuilderImpl(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) : TypeBuilder_(type) - , ValueHolder_(std::make_unique()) + , ValueHolder_(std::move(holder)) { PushPath(value); } @@ -2182,14 +2124,14 @@ class TValueBuilderImpl { CheckValue(); Ydb::Value value; - value.Swap(&ValueHolder_->Value()); + value.Swap(&ValueHolder_.Value()); return TValue(TypeBuilder_.Build(), std::move(value)); } TArenaAllocatedValue BuildArenaAllocatedValue() { CheckValue(); - return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_->ValuePtr()); + return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_.ValuePtr()); } void Bool(bool value) { @@ -2902,7 +2844,7 @@ class TValueBuilderImpl { //TTypeBuilder TypeBuilder_; TTypeBuilder::TImpl TypeBuilder_; // Ydb::Value ProtoValue_; - std::unique_ptr ValueHolder_; + ValueHolder ValueHolder_; std::map StructsMap_; TStackVec Path_; @@ -2911,217 +2853,211 @@ class TValueBuilderImpl { //////////////////////////////////////////////////////////////////////////////// -template -TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; - -template -TValueBuilderBase::~TValueBuilderBase() = default; - -template -TValueBuilderBase::TValueBuilderBase() - : Impl_(new TValueBuilderImpl()) {} - +template +TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; -template -TValueBuilderBase::TValueBuilderBase(google::protobuf::Arena* arena) - : Impl_(new TValueBuilderImpl(arena)) {} +template +TValueBuilderBase::~TValueBuilderBase() = default; +template +TValueBuilderBase::TValueBuilderBase(ValueHolder holder) + : Impl_(new TValueBuilderImpl(std::move(holder))) {} -template -TValueBuilderBase::TValueBuilderBase(const TType& type) - : Impl_(new TValueBuilderImpl(type)) {} +template +TValueBuilderBase::TValueBuilderBase(ValueHolder holder, const TType& type) + : Impl_(new TValueBuilderImpl(std::move(holder), type)) {} -template -TValueBuilderBase::TValueBuilderBase(Ydb::Type& type, Ydb::Value& value) - : Impl_(new TValueBuilderImpl(type, value)) {} +template +TValueBuilderBase::TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) + : Impl_(new TValueBuilderImpl(std::move(holder), type, value)) {} -template -void TValueBuilderBase::CheckValue() { +template +void TValueBuilderBase::CheckValue() { return Impl_->CheckValue(); } -template -TDerived& TValueBuilderBase::Bool(bool value) { +template +TDerived& TValueBuilderBase::Bool(bool value) { Impl_->Bool(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int8(i8 value) { +template +TDerived& TValueBuilderBase::Int8(i8 value) { Impl_->Int8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint8(ui8 value) { +template +TDerived& TValueBuilderBase::Uint8(ui8 value) { Impl_->Uint8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int16(i16 value) { +template +TDerived& TValueBuilderBase::Int16(i16 value) { Impl_->Int16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint16(ui16 value) { +template +TDerived& TValueBuilderBase::Uint16(ui16 value) { Impl_->Uint16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int32(i32 value) { +template +TDerived& TValueBuilderBase::Int32(i32 value) { Impl_->Int32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint32(ui32 value) { +template +TDerived& TValueBuilderBase::Uint32(ui32 value) { Impl_->Uint32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int64(int64_t value) { +template +TDerived& TValueBuilderBase::Int64(int64_t value) { Impl_->Int64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint64(uint64_t value) { +template +TDerived& TValueBuilderBase::Uint64(uint64_t value) { Impl_->Uint64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Float(float value) { +template +TDerived& TValueBuilderBase::Float(float value) { Impl_->Float(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Double(double value) { +template +TDerived& TValueBuilderBase::Double(double value) { Impl_->Double(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date(const TInstant& value) { +template +TDerived& TValueBuilderBase::Date(const TInstant& value) { Impl_->Date(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime(const TInstant& value) { +template +TDerived& TValueBuilderBase::Datetime(const TInstant& value) { Impl_->Datetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { +template +TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { Impl_->Timestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval(int64_t value) { +template +TDerived& TValueBuilderBase::Interval(int64_t value) { Impl_->Interval(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date32(const i32 value) { +template +TDerived& TValueBuilderBase::Date32(const i32 value) { Impl_->Date32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime64(const int64_t value) { +template +TDerived& TValueBuilderBase::Datetime64(const int64_t value) { Impl_->Datetime64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { +template +TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { Impl_->Timestamp64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval64(const int64_t value) { +template +TDerived& TValueBuilderBase::Interval64(const int64_t value) { Impl_->Interval64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDate(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDate(const std::string& value) { Impl_->TzDate(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { Impl_->TzDatetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { +template +TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { Impl_->TzTimestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::String(const std::string& value) { +template +TDerived& TValueBuilderBase::String(const std::string& value) { Impl_->String(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Utf8(const std::string& value) { +template +TDerived& TValueBuilderBase::Utf8(const std::string& value) { Impl_->Utf8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Yson(const std::string& value) { +template +TDerived& TValueBuilderBase::Yson(const std::string& value) { Impl_->Yson(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Json(const std::string& value) { +template +TDerived& TValueBuilderBase::Json(const std::string& value) { Impl_->Json(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { +template +TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { Impl_->Uuid(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { +template +TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { Impl_->JsonDocument(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DyNumber(const std::string& value) { +template +TDerived& TValueBuilderBase::DyNumber(const std::string& value) { Impl_->DyNumber(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { +template +TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { Impl_->Decimal(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Pg(const TPgValue& value) { +template +TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->Pg(value); return static_cast(*this); } @@ -3142,357 +3078,357 @@ TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->EndOptional(); \ return static_cast(*this); -template -TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { SET_OPT_VALUE_MAYBE(Bool); } -template -TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int8); } -template -TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint8); } -template -TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int16); } -template -TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint16); } -template -TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int32); } -template -TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint32); } -template -TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int64); } -template -TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint64); } -template -TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { SET_OPT_VALUE_MAYBE(Float); } -template -TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { SET_OPT_VALUE_MAYBE(Double); } -template -TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date); } -template -TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime); } -template -TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp); } -template -TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval); } -template -TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date32); } -template -TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime64); } -template -TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp64); } -template -TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval64); } -template -TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDate); } -template -TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDatetime); } -template -TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzTimestamp); } -template -TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { SET_OPT_VALUE_MAYBE(String); } -template -TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Utf8); } -template -TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Yson); } -template -TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Json); } -template -TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uuid); } -template -TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { SET_OPT_VALUE_MAYBE(JsonDocument); } -template -TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { SET_OPT_VALUE_MAYBE(DyNumber); } -template -TDerived& TValueBuilderBase::BeginOptional() { +template +TDerived& TValueBuilderBase::BeginOptional() { Impl_->BeginOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndOptional() { +template +TDerived& TValueBuilderBase::EndOptional() { Impl_->EndOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional() { +template +TDerived& TValueBuilderBase::EmptyOptional() { Impl_->EmptyOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginList() { +template +TDerived& TValueBuilderBase::BeginList() { Impl_->BeginList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndList() { +template +TDerived& TValueBuilderBase::EndList() { Impl_->EndList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem() { +template +TDerived& TValueBuilderBase::AddListItem() { Impl_->AddListItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { Impl_->AddListItem(itemValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { Impl_->AddListItem(std::move(itemValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { Impl_->EmptyList(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList() { +template +TDerived& TValueBuilderBase::EmptyList() { Impl_->EmptyList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginStruct() { +template +TDerived& TValueBuilderBase::BeginStruct() { Impl_->BeginStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { Impl_->AddMember(memberName); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { Impl_->AddMember(memberName, memberValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { Impl_->AddMember(memberName, std::move(memberValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndStruct() { +template +TDerived& TValueBuilderBase::EndStruct() { Impl_->EndStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTuple() { +template +TDerived& TValueBuilderBase::BeginTuple() { Impl_->BeginTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement() { +template +TDerived& TValueBuilderBase::AddElement() { Impl_->AddElement(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { +template +TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { Impl_->AddElement(elementValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTuple() { +template +TDerived& TValueBuilderBase::EndTuple() { Impl_->EndTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginDict() { +template +TDerived& TValueBuilderBase::BeginDict() { Impl_->BeginDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddDictItem() { +template +TDerived& TValueBuilderBase::AddDictItem() { Impl_->AddDictItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey() { +template +TDerived& TValueBuilderBase::DictKey() { Impl_->DictKey(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { +template +TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { Impl_->DictKey(keyValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload() { +template +TDerived& TValueBuilderBase::DictPayload() { Impl_->DictPayload(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { +template +TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { Impl_->DictPayload(payloadValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndDict() { +template +TDerived& TValueBuilderBase::EndDict() { Impl_->EndDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { +template +TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { Impl_->EmptyDict(keyType, payloadType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict() { +template +TDerived& TValueBuilderBase::EmptyDict() { Impl_->EmptyDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { +template +TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { Impl_->BeginTagged(tag); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTagged() { +template +TDerived& TValueBuilderBase::EndTagged() { Impl_->EndTagged(); return static_cast(*this); } //////////////////////////////////////////////////////////////////////////////// -template class TValueBuilderBase; -template class TValueBuilderBase; -template class TValueBuilderBase; +template class TValueBuilderBase; +template class TValueBuilderBase; +template class TValueBuilderBase; //////////////////////////////////////////////////////////////////////////////// TValueBuilder::TValueBuilder() - : TValueBuilderBase() {} + : TValueBuilderBase(StackAllocatedValueHolder()) {} TValueBuilder::TValueBuilder(const TType& type) - : TValueBuilderBase(type) {} + : TValueBuilderBase(StackAllocatedValueHolder(), type) {} TValue TValueBuilder::Build() { return Impl_->BuildStackAllocatedValue(); @@ -3501,7 +3437,7 @@ TValue TValueBuilder::Build() { //////////////////////////////////////////////////////////////////////////////// TArenaAllocatedValueBuilder::TArenaAllocatedValueBuilder(google::protobuf::Arena* arena) - : TValueBuilderBase(arena) {} + : TValueBuilderBase(ArenaAllocatedValueHolder(arena)) {} TArenaAllocatedValue TArenaAllocatedValueBuilder::BuildArenaAllocatedValue() { return Impl_->BuildArenaAllocatedValue(); From 577711e417b9717f618087c53fe9de920068089c Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 23 May 2025 20:52:22 +0000 Subject: [PATCH 18/41] feat: output upload metrics for extraction in evaluation pipeline --- ydb/public/lib/ydb_cli/import/import.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index a9fd059a2479..fdfca16a65c0 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -845,6 +845,15 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons std::cerr << "Elapsed: " << std::setprecision(3) << duration.SecondsFloat() << " sec. Total read size: " << PrettifyBytes(TotalBytesRead) << ". Average processing speed: " << PrettifyBytes((double)TotalBytesRead / duration.SecondsFloat()) << "/s." << std::endl; + + // print strcture for the pipeline: + // elapsed_time_sec: [float] + // total_read_size_byte: [integer] + // avg_processing_speed_bytes_per_sec: [integer] + std::cout << "elapsed_time_sec: " << std::setprecision(5) << duration.SecondsFloat() << std::endl; + std::cout << "total_read_size_byte: " << TotalBytesRead << std::endl; + std::cout << "avg_processing_speed_MiB_per_sec: " << std::setprecision(5) + << ((double)TotalBytesRead / duration.SecondsFloat()) / (1024 * 1024) << std::endl; } // Removing all progress files that were a part of this import From 6c34382ef3916d15c24b2cd4d6ddf3de3a145e21 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 12:25:11 +0000 Subject: [PATCH 19/41] refactor: remove modifications in SDK related to TValueBuilder --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 456 +----------------- ydb/public/lib/ydb_cli/common/csv_parser.h | 3 +- ydb/public/sdk/cpp/client/ydb_value/fwd.h | 1 - .../ydb-cpp-sdk/client/params/params.h | 2 +- .../include/ydb-cpp-sdk/client/value/fwd.h | 3 +- .../include/ydb-cpp-sdk/client/value/value.h | 72 +-- .../sdk/cpp/src/client/params/params.cpp | 2 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 445 ++++++++--------- 8 files changed, 230 insertions(+), 754 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index badf392356b9..2297139e47ab 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -396,414 +396,6 @@ class TCsvToYdbConverter { }; - -// TODO: merge with the impl above -class TCsvToYdbOnArenaConverter { -public: - explicit TCsvToYdbOnArenaConverter(TTypeParser& parser, const std::optional& nullValue, google::protobuf::Arena* arena) - : Parser(parser) - , NullValue(nullValue) - , Builder(arena) {} - - template && std::is_signed_v, std::nullptr_t> = nullptr> - static i64 StringToArithmetic(const TString& token, size_t& cnt) { - return std::stoll(token, &cnt); - } - - template && std::is_unsigned_v, std::nullptr_t> = nullptr> - static ui64 StringToArithmetic(const TString& token, size_t& cnt) { - return std::stoull(token, &cnt); - } - - template , std::nullptr_t> = nullptr> - static float StringToArithmetic(const TString& token, size_t& cnt) { - return std::stof(token, &cnt); - } - - template , std::nullptr_t> = nullptr> - static double StringToArithmetic(const TString& token, size_t& cnt) { - return std::stod(token, &cnt); - } - - template - T GetArithmetic(const TString& token) const { - size_t cnt; - try { - auto value = StringToArithmetic(token, cnt); - if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { - throw yexception(); - } - return static_cast(value); - } catch (std::exception& e) { - throw TCsvParseException() << "Expected " << Parser.GetPrimitive() << " value, received: \"" << token << "\"."; - } - } - - void BuildPrimitive(const TString& token) { - switch (Parser.GetPrimitive()) { - case EPrimitiveType::Int8: - Builder.Int8(GetArithmetic(token)); - break; - case EPrimitiveType::Int16: - Builder.Int16(GetArithmetic(token)); - break; - case EPrimitiveType::Int32: - Builder.Int32(GetArithmetic(token)); - break; - case EPrimitiveType::Int64: - Builder.Int64(GetArithmetic(token)); - break; - case EPrimitiveType::Uint8: - Builder.Uint8(GetArithmetic(token)); - break; - case EPrimitiveType::Uint16: - Builder.Uint16(GetArithmetic(token)); - break; - case EPrimitiveType::Uint32: - Builder.Uint32(GetArithmetic(token)); - break; - case EPrimitiveType::Uint64: - Builder.Uint64(GetArithmetic(token)); - break; - case EPrimitiveType::Bool: - Builder.Bool(GetBool(token)); - break; - case EPrimitiveType::String: - Builder.String(token); - break; - case EPrimitiveType::Utf8: - Builder.Utf8(token); - break; - case EPrimitiveType::Json: - Builder.Json(token); - break; - case EPrimitiveType::JsonDocument: - Builder.JsonDocument(token); - break; - case EPrimitiveType::Yson: - Builder.Yson(token); - break; - case EPrimitiveType::Uuid: - Builder.Uuid(TUuidValue{token}); - break; - case EPrimitiveType::Float: - Builder.Float(GetArithmetic(token)); - break; - case EPrimitiveType::Double: - Builder.Double(GetArithmetic(token)); - break; - case EPrimitiveType::DyNumber: - Builder.DyNumber(token); - break; - case EPrimitiveType::Date: { - TInstant date; - if (!TInstant::TryParseIso8601(token, date)) { - date = TInstant::Days(GetArithmetic(token)); - } - Builder.Date(date); - break; - } - case EPrimitiveType::Datetime: { - TInstant datetime; - if (!TInstant::TryParseIso8601(token, datetime)) { - datetime = TInstant::Seconds(GetArithmetic(token)); - } - Builder.Datetime(datetime); - break; - } - case EPrimitiveType::Timestamp: { - TInstant timestamp; - if (!TInstant::TryParseIso8601(token, timestamp)) { - timestamp = TInstant::MicroSeconds(GetArithmetic(token)); - } - Builder.Timestamp(timestamp); - break; - } - case EPrimitiveType::Interval: - Builder.Interval(GetArithmetic(token)); - break; - case EPrimitiveType::Date32: { - TInstant date; - if (TInstant::TryParseIso8601(token, date)) { - Builder.Date32(date.Days()); - } else { - Builder.Date32(GetArithmetic(token)); - } - break; - } - case EPrimitiveType::Datetime64: { - TInstant date; - if (TInstant::TryParseIso8601(token, date)) { - Builder.Datetime64(date.Seconds()); - } else { - Builder.Datetime64(GetArithmetic(token)); - } - break; - } - case EPrimitiveType::Timestamp64: { - TInstant date; - if (TInstant::TryParseIso8601(token, date)) { - Builder.Timestamp64(date.MicroSeconds()); - } else { - Builder.Timestamp64(GetArithmetic(token)); - } - break; - } - case EPrimitiveType::Interval64: - Builder.Interval64(GetArithmetic(token)); - break; - case EPrimitiveType::TzDate: - Builder.TzDate(token); - break; - case EPrimitiveType::TzDatetime: - Builder.TzDatetime(token); - break; - case EPrimitiveType::TzTimestamp: - Builder.TzTimestamp(token); - break; - default: - throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); - } - } - - void BuildValue(const TStringBuf& token) { - switch (Parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: { - BuildPrimitive(std::string{token}); - break; - } - case TTypeParser::ETypeKind::Decimal: { - Builder.Decimal(TDecimalValue(std::string(token), Parser.GetDecimal().Precision, Parser.GetDecimal().Scale)); - break; - } - case TTypeParser::ETypeKind::Optional: { - Parser.OpenOptional(); - if (NullValue && token == NullValue) { - Builder.EmptyOptional(GetType()); - } else { - Builder.BeginOptional(); - BuildValue(token); - Builder.EndOptional(); - } - Parser.CloseOptional(); - break; - } - case TTypeParser::ETypeKind::Null: { - EnsureNull(token); - break; - } - case TTypeParser::ETypeKind::Void: { - EnsureNull(token); - break; - } - case TTypeParser::ETypeKind::Tagged: { - Parser.OpenTagged(); - Builder.BeginTagged(Parser.GetTag()); - BuildValue(token); - Builder.EndTagged(); - Parser.CloseTagged(); - break; - } - case TTypeParser::ETypeKind::Pg: { - if (NullValue && token == NullValue) { - Builder.Pg(TPgValue(TPgValue::VK_NULL, {}, Parser.GetPg())); - } else { - Builder.Pg(TPgValue(TPgValue::VK_TEXT, TString(token), Parser.GetPg())); - } - break; - } - default: - throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); - } - } - - void BuildType(TTypeBuilder& typeBuilder) { - switch (Parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: - typeBuilder.Primitive(Parser.GetPrimitive()); - break; - - case TTypeParser::ETypeKind::Decimal: - typeBuilder.Decimal(Parser.GetDecimal()); - break; - - case TTypeParser::ETypeKind::Optional: - Parser.OpenOptional(); - typeBuilder.BeginOptional(); - BuildType(typeBuilder); - typeBuilder.EndOptional(); - Parser.CloseOptional(); - break; - - case TTypeParser::ETypeKind::Tagged: - Parser.OpenTagged(); - typeBuilder.BeginTagged(Parser.GetTag()); - BuildType(typeBuilder); - typeBuilder.EndTagged(); - Parser.CloseTagged(); - break; - - case TTypeParser::ETypeKind::Pg: - typeBuilder.Pg(Parser.GetPg()); - break; - - default: - throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); - } - } - - TType GetType() { - TTypeBuilder typeBuilder; - BuildType(typeBuilder); - return typeBuilder.Build(); - } - - bool GetBool(const TString& token) const { - if (token == "true") { - return true; - } - if (token == "false") { - return false; - } - throw TCsvParseException() << "Expected bool value: \"true\" or \"false\", received: \"" << token << "\"."; - } - - void EnsureNull(const TStringBuf& token) const { - if (!NullValue) { - throw TCsvParseException() << "Expected null value instead of \"" << token << "\", but null value is not set."; - } - if (token != NullValue) { - throw TCsvParseException() << "Expected null value: \"" << NullValue << "\", received: \"" << token << "\"."; - } - } - - template - bool TryParseArithmetic(const TString& token) const { - size_t cnt; - try { - auto value = StringToArithmetic(token, cnt); - if (cnt != token.size() || value < std::numeric_limits::lowest() || value > std::numeric_limits::max()) { - return false; - } - } catch (std::exception& e) { - return false; - } - return true; - } - - bool TryParseBool(const TString& token) const { - TString tokenLowerCase = to_lower(token); - return tokenLowerCase == "true" || tokenLowerCase == "false"; - } - - bool TryParsePrimitive(const TString& token) { - switch (Parser.GetPrimitive()) { - case EPrimitiveType::Uint8: - return TryParseArithmetic(token) && !token.StartsWith('-'); - case EPrimitiveType::Uint16: - return TryParseArithmetic(token) && !token.StartsWith('-');; - case EPrimitiveType::Uint32: - return TryParseArithmetic(token) && !token.StartsWith('-');; - case EPrimitiveType::Uint64: - return TryParseArithmetic(token) && !token.StartsWith('-');; - case EPrimitiveType::Int8: - return TryParseArithmetic(token); - case EPrimitiveType::Int16: - return TryParseArithmetic(token); - case EPrimitiveType::Int32: - return TryParseArithmetic(token); - case EPrimitiveType::Int64: - return TryParseArithmetic(token); - case EPrimitiveType::Bool: - return TryParseBool(token); - case EPrimitiveType::Json: - return token.StartsWith('{') && token.EndsWith('}'); - break; - case EPrimitiveType::JsonDocument: - break; - case EPrimitiveType::Yson: - break; - case EPrimitiveType::Uuid: - static std::regex uuidRegexTemplate("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); - return std::regex_match(token.c_str(), uuidRegexTemplate); - case EPrimitiveType::Float: - return TryParseArithmetic(token); - case EPrimitiveType::Double: - return TryParseArithmetic(token); - case EPrimitiveType::DyNumber: - break; - case EPrimitiveType::Date: { - TInstant date; - return TInstant::TryParseIso8601(token, date) && token.length() <= 10; - } - case EPrimitiveType::Datetime: { - TInstant datetime; - return TInstant::TryParseIso8601(token, datetime) && token.length() <= 19; - } - case EPrimitiveType::Timestamp: { - TInstant timestamp; - return TInstant::TryParseIso8601(token, timestamp) || TryParseArithmetic(token); - } - case EPrimitiveType::Interval: - break; - case EPrimitiveType::Date32: { - TInstant date; - return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); - } - case EPrimitiveType::Datetime64: { - TInstant date; - return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); - } - case EPrimitiveType::Timestamp64: { - TInstant date; - return TInstant::TryParseIso8601(token, date) || TryParseArithmetic(token); - } - case EPrimitiveType::Interval64: - return TryParseArithmetic(token); - case EPrimitiveType::TzDate: - break; - case EPrimitiveType::TzDatetime: - break; - case EPrimitiveType::TzTimestamp: - break; - default: - throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive(); - } - return false; - } - - bool TryParseValue(const TStringBuf& token, TPossibleType& possibleType) { - if (NullValue && token == NullValue) { - possibleType.SetHasNulls(true); - return true; - } - possibleType.SetHasNonNulls(true); - switch (Parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: { - return TryParsePrimitive(TString(token)); - } - case TTypeParser::ETypeKind::Decimal: { - break; - } - default: - throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind(); - } - return false; - } - - TArenaAllocatedValue Convert(const TStringBuf& token) { - BuildValue(token); - return Builder.BuildArenaAllocatedValue(); - } - -private: - TTypeParser& Parser; - const std::optional NullValue = ""; - TArenaAllocatedValueBuilder Builder; -}; - - - TCsvParseException FormatError(const std::exception& inputError, const TCsvParser::TParseMetadata& meta, const std::optional& columnName = {}) { @@ -834,23 +426,6 @@ TValue FieldToValue(TTypeParser& parser, } } -TArenaAllocatedValue FieldToValueOnArena( - TTypeParser& parser, - const TStringBuf& token, - const std::optional& nullValue, - const TCsvParser::TParseMetadata& meta, - const std::string& columnName, - google::protobuf::Arena* arena -) { - Y_ASSERT(arena != nullptr); - try { - TCsvToYdbOnArenaConverter converter(parser, nullValue, arena); - return converter.Convert(token); - } catch (std::exception& e) { - throw FormatError(e, meta, columnName); - } -} - TStringBuf Consume(NCsvFormat::CsvSplitter& splitter, const TCsvParser::TParseMetadata& meta, const TString& columnName) { @@ -966,7 +541,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, /* arena = */ nullptr); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); if (row.has_value()) { ++row.value(); } @@ -998,7 +573,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( listItems->Reserve(lines.size()); for (const auto& line : lines) { - ProcessCsvLine(line, listItems, columnTypeParsers, row, filename, arena); + ProcessCsvLine(line, listItems, columnTypeParsers, row, filename); if (row.has_value()) { ++row.value(); } @@ -1016,8 +591,7 @@ void TCsvParser::ProcessCsvLine( google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename, - google::protobuf::Arena* arena + const TString& filename ) const { NCsvFormat::CsvSplitter splitter(line, Delimeter); auto* structItems = listItems->Add()->mutable_items(); @@ -1035,28 +609,8 @@ void TCsvParser::ProcessCsvLine( } TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { - // TODO: create Ydb::Value on the arena to avoid copying here - // TODO: (despite std::move, we copy from non-arena allocated Ydb::Value into arena-allocated structItems) - if (arena != nullptr) { - TArenaAllocatedValue builtValue = FieldToValueOnArena( - *typeParserIt->get(), nextField, NullValue, meta, *headerIt, arena); - - // if (print.load()) { - // Cerr << "arena: " << static_cast(arena) << Endl; - // Cerr << "builtValue.GetProto()->GetArena(): " << static_cast(builtValue.GetProto()->GetArena()) << Endl; - // Cerr << "structItems->GetArena(): " << static_cast(structItems->GetArena()) << Endl; - // Cerr << "structItems->Add()->GetArena(): " << static_cast(structItems->Add()->GetArena()) << Endl; - // print.store(false); - // } - // Y_ASSERT(arena != nullptr); - // Y_ASSERT(builtValue.GetProto()->GetArena() != nullptr); - // Y_ASSERT(builtValue.GetProto()->GetArena() == structItems->GetArena()); - - *structItems->Add() = std::move(builtValue).ExtractProto(); - } else { - TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); - *structItems->Add() = std::move(builtValue).ExtractProto(); - } + TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); + *structItems->Add() = std::move(builtValue).ExtractProto(); ++typeParserIt; } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index fc2cc6107663..bc5902fa013b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -78,8 +78,7 @@ class TCsvParser { google::protobuf::RepeatedPtrField* listItems, const std::vector>& columnTypeParsers, std::optional currentRow, - const TString& filename, - google::protobuf::Arena* arena + const TString& filename ) const; }; diff --git a/ydb/public/sdk/cpp/client/ydb_value/fwd.h b/ydb/public/sdk/cpp/client/ydb_value/fwd.h index d1b26e0091fa..f9f5df1abf5c 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/fwd.h +++ b/ydb/public/sdk/cpp/client/ydb_value/fwd.h @@ -8,7 +8,6 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; -class TArenaAllocatedValueBuilder; template class TValueBuilderBase; diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h index 31a0d904a821..71749a9c1adb 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/params/params.h @@ -60,7 +60,7 @@ class TParams { std::shared_ptr Impl_; }; -class TParamValueBuilder : public TValueBuilderBase { +class TParamValueBuilder : public TValueBuilderBase { friend class TParamsBuilder; public: TParamsBuilder& Build(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h index 24a6913987b7..06eb71ac0703 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/fwd.h @@ -8,9 +8,8 @@ class TTypeBuilder; class TValue; class TValueParser; class TValueBuilder; -class TArenaAllocatedValueBuilder; -template +template class TValueBuilderBase; } // namespace NYdb diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 5883ee6f8d29..7e5702b5d3f4 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -174,7 +174,6 @@ std::string FormatType(const TType& type); //! To create complex type, corresponding scope should be opened by Begin*/End* calls //! To create complex repeated type, Add* should be called at least once class TTypeBuilder : public TMoveOnly { - template friend class TValueBuilderImpl; public: TTypeBuilder(TTypeBuilder&&); @@ -295,6 +294,7 @@ class TValue { std::shared_ptr Impl_; }; + class TArenaAllocatedValue { public: TArenaAllocatedValue(const TType& type, Ydb::Value* value); @@ -313,6 +313,8 @@ class TArenaAllocatedValue { }; + + class TValueParser : public TMoveOnly { friend class TResultSetParser; public: @@ -432,59 +434,10 @@ class TValueParser : public TMoveOnly { std::unique_ptr Impl_; }; -template class TValueBuilderImpl; -class StackAllocatedValueHolder { -public: - Ydb::Value& Value() { - return ProtoValue_; - } - - Ydb::Value* ValuePtr() { - return &ProtoValue_; - } - -private: - Ydb::Value ProtoValue_; -}; - -class ArenaAllocatedValueHolder { -public: - ArenaAllocatedValueHolder(google::protobuf::Arena* arena) - // value is created lazily on first access - : ArenaAllocatedValue_(nullptr) - , Arena_(arena) { - Y_ASSERT(arena != nullptr); - } - - Ydb::Value& Value() { - createValueIfAbsent(); - return *ArenaAllocatedValue_; - } - - Ydb::Value* ValuePtr() { - createValueIfAbsent(); - return ArenaAllocatedValue_; - } - -private: - void createValueIfAbsent() { - if (ArenaAllocatedValue_ == nullptr) { - ArenaAllocatedValue_ = google::protobuf::Arena::CreateMessage(Arena_); - } - } - -private: - Ydb::Value* ArenaAllocatedValue_; - google::protobuf::Arena* Arena_; -}; - - - - -template +template class TValueBuilderBase : public TMoveOnly { friend TDerived; public: @@ -597,21 +550,21 @@ class TValueBuilderBase : public TMoveOnly { protected: TValueBuilderBase(TValueBuilderBase&&); - TValueBuilderBase(ValueHolder holder); + TValueBuilderBase(); - TValueBuilderBase(ValueHolder holder, const TType& type); + TValueBuilderBase(const TType& type); - TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value); + TValueBuilderBase(Ydb::Type& type, Ydb::Value& value); ~TValueBuilderBase(); void CheckValue(); private: - std::unique_ptr> Impl_; + std::unique_ptr Impl_; }; -class TValueBuilder : public TValueBuilderBase { +class TValueBuilder : public TValueBuilderBase { public: TValueBuilder(); @@ -620,13 +573,6 @@ class TValueBuilder : public TValueBuilderBase { -public: - TArenaAllocatedValueBuilder(google::protobuf::Arena* arena); - - TArenaAllocatedValue BuildArenaAllocatedValue(); -}; - } // namespace NYdb template<> diff --git a/ydb/public/sdk/cpp/src/client/params/params.cpp b/ydb/public/sdk/cpp/src/client/params/params.cpp index 87aec4f6e38a..25cb1a71c93c 100644 --- a/ydb/public/sdk/cpp/src/client/params/params.cpp +++ b/ydb/public/sdk/cpp/src/client/params/params.cpp @@ -130,7 +130,7 @@ class TParamsBuilder::TImpl { //////////////////////////////////////////////////////////////////////////////// TParamValueBuilder::TParamValueBuilder(TParamsBuilder& owner, Ydb::Type& typeProto, Ydb::Value& valueProto) - : TValueBuilderBase(StackAllocatedValueHolder(), typeProto, valueProto) + : TValueBuilderBase(typeProto, valueProto) , Owner_(owner) , Finished_(false) {} diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index d3fa0ab97eb5..8626e4b709de 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -2066,8 +2066,6 @@ void TValueParser::CloseTagged() { //////////////////////////////////////////////////////////////////////////////// -// TODO: if TValueArenaBuilderImpl implemented, we can avoid copying Ydb::Value in TCsvParser::BuildListOnArena when FieldToValue is called -template class TValueBuilderImpl { using ETypeKind = TTypeParser::ETypeKind; using TMembersMap = std::map; @@ -2091,24 +2089,21 @@ class TValueBuilderImpl { }; public: - TValueBuilderImpl(ValueHolder holder) + TValueBuilderImpl() : TypeBuilder_() - , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_.Value()); + PushPath(ProtoValue_); } - TValueBuilderImpl(ValueHolder holder, const TType& type) + TValueBuilderImpl(const TType& type) : TypeBuilder_() - , ValueHolder_(std::move(holder)) { - PushPath(ValueHolder_.Value()); + PushPath(ProtoValue_); GetType().CopyFrom(type.GetProto()); } - TValueBuilderImpl(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) + TValueBuilderImpl(Ydb::Type& type, Ydb::Value& value) : TypeBuilder_(type) - , ValueHolder_(std::move(holder)) { PushPath(value); } @@ -2120,20 +2115,15 @@ class TValueBuilderImpl { } } - TValue BuildStackAllocatedValue() { + TValue BuildValue() { CheckValue(); Ydb::Value value; - value.Swap(&ValueHolder_.Value()); + value.Swap(&ProtoValue_); return TValue(TypeBuilder_.Build(), std::move(value)); } - TArenaAllocatedValue BuildArenaAllocatedValue() { - CheckValue(); - return TArenaAllocatedValue(TypeBuilder_.Build(), ValueHolder_.ValuePtr()); - } - void Bool(bool value) { FillPrimitiveType(EPrimitiveType::Bool); GetValue().set_bool_value(value); @@ -2843,8 +2833,7 @@ class TValueBuilderImpl { private: //TTypeBuilder TypeBuilder_; TTypeBuilder::TImpl TypeBuilder_; - // Ydb::Value ProtoValue_; - ValueHolder ValueHolder_; + Ydb::Value ProtoValue_; std::map StructsMap_; TStackVec Path_; @@ -2853,211 +2842,211 @@ class TValueBuilderImpl { //////////////////////////////////////////////////////////////////////////////// -template -TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; +template +TValueBuilderBase::TValueBuilderBase(TValueBuilderBase&&) = default; -template -TValueBuilderBase::~TValueBuilderBase() = default; +template +TValueBuilderBase::~TValueBuilderBase() = default; -template -TValueBuilderBase::TValueBuilderBase(ValueHolder holder) - : Impl_(new TValueBuilderImpl(std::move(holder))) {} +template +TValueBuilderBase::TValueBuilderBase() + : Impl_(new TValueBuilderImpl()) {} -template -TValueBuilderBase::TValueBuilderBase(ValueHolder holder, const TType& type) - : Impl_(new TValueBuilderImpl(std::move(holder), type)) {} +template +TValueBuilderBase::TValueBuilderBase(const TType& type) + : Impl_(new TValueBuilderImpl(type)) {} -template -TValueBuilderBase::TValueBuilderBase(ValueHolder holder, Ydb::Type& type, Ydb::Value& value) - : Impl_(new TValueBuilderImpl(std::move(holder), type, value)) {} +template +TValueBuilderBase::TValueBuilderBase(Ydb::Type& type, Ydb::Value& value) + : Impl_(new TValueBuilderImpl(type, value)) {} -template -void TValueBuilderBase::CheckValue() { +template +void TValueBuilderBase::CheckValue() { return Impl_->CheckValue(); } -template -TDerived& TValueBuilderBase::Bool(bool value) { +template +TDerived& TValueBuilderBase::Bool(bool value) { Impl_->Bool(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int8(i8 value) { +template +TDerived& TValueBuilderBase::Int8(i8 value) { Impl_->Int8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint8(ui8 value) { +template +TDerived& TValueBuilderBase::Uint8(ui8 value) { Impl_->Uint8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int16(i16 value) { +template +TDerived& TValueBuilderBase::Int16(i16 value) { Impl_->Int16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint16(ui16 value) { +template +TDerived& TValueBuilderBase::Uint16(ui16 value) { Impl_->Uint16(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int32(i32 value) { +template +TDerived& TValueBuilderBase::Int32(i32 value) { Impl_->Int32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint32(ui32 value) { +template +TDerived& TValueBuilderBase::Uint32(ui32 value) { Impl_->Uint32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Int64(int64_t value) { +template +TDerived& TValueBuilderBase::Int64(int64_t value) { Impl_->Int64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uint64(uint64_t value) { +template +TDerived& TValueBuilderBase::Uint64(uint64_t value) { Impl_->Uint64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Float(float value) { +template +TDerived& TValueBuilderBase::Float(float value) { Impl_->Float(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Double(double value) { +template +TDerived& TValueBuilderBase::Double(double value) { Impl_->Double(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date(const TInstant& value) { +template +TDerived& TValueBuilderBase::Date(const TInstant& value) { Impl_->Date(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime(const TInstant& value) { +template +TDerived& TValueBuilderBase::Datetime(const TInstant& value) { Impl_->Datetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { +template +TDerived& TValueBuilderBase::Timestamp(const TInstant& value) { Impl_->Timestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval(int64_t value) { +template +TDerived& TValueBuilderBase::Interval(int64_t value) { Impl_->Interval(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Date32(const i32 value) { +template +TDerived& TValueBuilderBase::Date32(const i32 value) { Impl_->Date32(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Datetime64(const int64_t value) { +template +TDerived& TValueBuilderBase::Datetime64(const int64_t value) { Impl_->Datetime64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { +template +TDerived& TValueBuilderBase::Timestamp64(const int64_t value) { Impl_->Timestamp64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Interval64(const int64_t value) { +template +TDerived& TValueBuilderBase::Interval64(const int64_t value) { Impl_->Interval64(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDate(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDate(const std::string& value) { Impl_->TzDate(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { +template +TDerived& TValueBuilderBase::TzDatetime(const std::string& value) { Impl_->TzDatetime(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { +template +TDerived& TValueBuilderBase::TzTimestamp(const std::string& value) { Impl_->TzTimestamp(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::String(const std::string& value) { +template +TDerived& TValueBuilderBase::String(const std::string& value) { Impl_->String(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Utf8(const std::string& value) { +template +TDerived& TValueBuilderBase::Utf8(const std::string& value) { Impl_->Utf8(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Yson(const std::string& value) { +template +TDerived& TValueBuilderBase::Yson(const std::string& value) { Impl_->Yson(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Json(const std::string& value) { +template +TDerived& TValueBuilderBase::Json(const std::string& value) { Impl_->Json(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { +template +TDerived& TValueBuilderBase::Uuid(const TUuidValue& value) { Impl_->Uuid(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { +template +TDerived& TValueBuilderBase::JsonDocument(const std::string& value) { Impl_->JsonDocument(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DyNumber(const std::string& value) { +template +TDerived& TValueBuilderBase::DyNumber(const std::string& value) { Impl_->DyNumber(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { +template +TDerived& TValueBuilderBase::Decimal(const TDecimalValue& value) { Impl_->Decimal(value); return static_cast(*this); } -template -TDerived& TValueBuilderBase::Pg(const TPgValue& value) { +template +TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->Pg(value); return static_cast(*this); } @@ -3078,369 +3067,359 @@ TDerived& TValueBuilderBase::Pg(const TPgValue& value) { Impl_->EndOptional(); \ return static_cast(*this); -template -TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalBool(const std::optional& value) { SET_OPT_VALUE_MAYBE(Bool); } -template -TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int8); } -template -TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint8); } -template -TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int16); } -template -TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint16(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint16); } -template -TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int32); } -template -TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint32); } -template -TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInt64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Int64); } -template -TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUint64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uint64); } -template -TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalFloat(const std::optional& value) { SET_OPT_VALUE_MAYBE(Float); } -template -TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDouble(const std::optional& value) { SET_OPT_VALUE_MAYBE(Double); } -template -TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date); } -template -TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime); } -template -TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp); } -template -TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval); } -template -TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDate32(const std::optional& value) { SET_OPT_VALUE_MAYBE(Date32); } -template -TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDatetime64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Datetime64); } -template -TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTimestamp64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Timestamp64); } -template -TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalInterval64(const std::optional& value) { SET_OPT_VALUE_MAYBE(Interval64); } -template -TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDate(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDate); } -template -TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzDatetime(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzDatetime); } -template -TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalTzTimestamp(const std::optional& value) { SET_OPT_VALUE_MAYBE(TzTimestamp); } -template -TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalString(const std::optional& value) { SET_OPT_VALUE_MAYBE(String); } -template -TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUtf8(const std::optional& value) { SET_OPT_VALUE_MAYBE(Utf8); } -template -TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalYson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Yson); } -template -TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJson(const std::optional& value) { SET_OPT_VALUE_MAYBE(Json); } -template -TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalUuid(const std::optional& value) { SET_OPT_VALUE_MAYBE(Uuid); } -template -TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalJsonDocument(const std::optional& value) { SET_OPT_VALUE_MAYBE(JsonDocument); } -template -TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { +template +TDerived& TValueBuilderBase::OptionalDyNumber(const std::optional& value) { SET_OPT_VALUE_MAYBE(DyNumber); } -template -TDerived& TValueBuilderBase::BeginOptional() { +template +TDerived& TValueBuilderBase::BeginOptional() { Impl_->BeginOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndOptional() { +template +TDerived& TValueBuilderBase::EndOptional() { Impl_->EndOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(const TType& itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { +template +TDerived& TValueBuilderBase::EmptyOptional(EPrimitiveType itemType) { Impl_->EmptyOptional(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyOptional() { +template +TDerived& TValueBuilderBase::EmptyOptional() { Impl_->EmptyOptional(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginList() { +template +TDerived& TValueBuilderBase::BeginList() { Impl_->BeginList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndList() { +template +TDerived& TValueBuilderBase::EndList() { Impl_->EndList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem() { +template +TDerived& TValueBuilderBase::AddListItem() { Impl_->AddListItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(const TValue& itemValue) { Impl_->AddListItem(itemValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { +template +TDerived& TValueBuilderBase::AddListItem(TValue&& itemValue) { Impl_->AddListItem(std::move(itemValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { +template +TDerived& TValueBuilderBase::EmptyList(const TType& itemType) { Impl_->EmptyList(itemType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyList() { +template +TDerived& TValueBuilderBase::EmptyList() { Impl_->EmptyList(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginStruct() { +template +TDerived& TValueBuilderBase::BeginStruct() { Impl_->BeginStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName) { Impl_->AddMember(memberName); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, const TValue& memberValue) { Impl_->AddMember(memberName, memberValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { +template +TDerived& TValueBuilderBase::AddMember(const std::string& memberName, TValue&& memberValue) { Impl_->AddMember(memberName, std::move(memberValue)); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndStruct() { +template +TDerived& TValueBuilderBase::EndStruct() { Impl_->EndStruct(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTuple() { +template +TDerived& TValueBuilderBase::BeginTuple() { Impl_->BeginTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement() { +template +TDerived& TValueBuilderBase::AddElement() { Impl_->AddElement(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { +template +TDerived& TValueBuilderBase::AddElement(const TValue& elementValue) { Impl_->AddElement(elementValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTuple() { +template +TDerived& TValueBuilderBase::EndTuple() { Impl_->EndTuple(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginDict() { +template +TDerived& TValueBuilderBase::BeginDict() { Impl_->BeginDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::AddDictItem() { +template +TDerived& TValueBuilderBase::AddDictItem() { Impl_->AddDictItem(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey() { +template +TDerived& TValueBuilderBase::DictKey() { Impl_->DictKey(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { +template +TDerived& TValueBuilderBase::DictKey(const TValue& keyValue) { Impl_->DictKey(keyValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload() { +template +TDerived& TValueBuilderBase::DictPayload() { Impl_->DictPayload(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { +template +TDerived& TValueBuilderBase::DictPayload(const TValue& payloadValue) { Impl_->DictPayload(payloadValue); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndDict() { +template +TDerived& TValueBuilderBase::EndDict() { Impl_->EndDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { +template +TDerived& TValueBuilderBase::EmptyDict(const TType& keyType, const TType& payloadType) { Impl_->EmptyDict(keyType, payloadType); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EmptyDict() { +template +TDerived& TValueBuilderBase::EmptyDict() { Impl_->EmptyDict(); return static_cast(*this); } -template -TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { +template +TDerived& TValueBuilderBase::BeginTagged(const std::string& tag) { Impl_->BeginTagged(tag); return static_cast(*this); } -template -TDerived& TValueBuilderBase::EndTagged() { +template +TDerived& TValueBuilderBase::EndTagged() { Impl_->EndTagged(); return static_cast(*this); } //////////////////////////////////////////////////////////////////////////////// -template class TValueBuilderBase; -template class TValueBuilderBase; -template class TValueBuilderBase; +template class TValueBuilderBase; +template class TValueBuilderBase; //////////////////////////////////////////////////////////////////////////////// TValueBuilder::TValueBuilder() - : TValueBuilderBase(StackAllocatedValueHolder()) {} + : TValueBuilderBase() {} TValueBuilder::TValueBuilder(const TType& type) - : TValueBuilderBase(StackAllocatedValueHolder(), type) {} + : TValueBuilderBase(type) {} TValue TValueBuilder::Build() { - return Impl_->BuildStackAllocatedValue(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TArenaAllocatedValueBuilder::TArenaAllocatedValueBuilder(google::protobuf::Arena* arena) - : TValueBuilderBase(ArenaAllocatedValueHolder(arena)) {} - -TArenaAllocatedValue TArenaAllocatedValueBuilder::BuildArenaAllocatedValue() { - return Impl_->BuildArenaAllocatedValue(); + return Impl_->BuildValue(); } } // namespace NYdb From 518f096ed0d833c95a07f82b3d6ed8a3e4d6f3a1 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 12:29:16 +0000 Subject: [PATCH 20/41] refactor: remove commented code --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 2 -- ydb/public/lib/ydb_cli/import/import.cpp | 13 ------------- 2 files changed, 15 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 2297139e47ab..934deab2f09b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -583,8 +583,6 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( return TArenaAllocatedValue(ResultListType.value(), value); } -std::atomic_bool print = true; - // Helper method to process a single CSV line void TCsvParser::ProcessCsvLine( const TString& line, diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index fdfca16a65c0..d66eede319f8 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1064,18 +1064,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - // auto buildFunc = [&, buffer = std::move(buffer), row, this] () mutable { - // try { - // return parser.BuildList(buffer, filePath, row); - // } catch (const std::exception& e) { - // if (!Failed.exchange(true)) { - // ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); - // } - // jobInflightManager->ReleaseJob(); - // throw; - // } - // }; - auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { try { return parser.BuildListOnArena(buffer, filePath, arena, row); @@ -1089,7 +1077,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }; // TODO: create arena here and pass it to UpsertTValueBufferOnArena? - // UpsertTValueBuffer(dbPath, std::move(buildFunc)) UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); From e80069c06caae9fad99b5c58cfc242afeb6ab1e9 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 28 Mar 2025 19:34:01 +0000 Subject: [PATCH 21/41] feat(TLinesSplitter::ConsumeLine): replace bool negation with quote chars counting Benchmarking results (see commit 62018144348 for details): 1. Previous solution (at commit 62018144348): Elapsed: 17.7 sec. Total read size: 8.33GiB. Average processing speed: 482MiB/s. 2. Current solution: Elapsed: 10.6 sec. Total read size: 8.33GiB. Average processing speed: 804MiB/s (up to 817MiB/s). Cherry-picked the commit: `773a3a5` See: https://github.com/Vladislav0Art/ydb/commit/773a3a5a860d398bba34bac548aaf59e87b4c983 --- library/cpp/string_utils/csv/csv.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp index fd3d932fcd5c..823c96d6904a 100644 --- a/library/cpp/string_utils/csv/csv.cpp +++ b/library/cpp/string_utils/csv/csv.cpp @@ -1,3 +1,5 @@ +#include + #include "csv.h" TStringBuf NCsvFormat::CsvSplitter::Consume() { @@ -70,11 +72,12 @@ TString NCsvFormat::TLinesSplitter::ConsumeLine() { TString result; TString line; while (Input.ReadLine(line)) { - for (auto it = line.begin(); it != line.end(); ++it) { - if (*it == Quote) { - Escape = !Escape; - } + const size_t quoteCount = std::count(line.cbegin(), line.cend(), Quote); + // TODO: use `& 1`? + if (quoteCount % 2 == 1) { + Escape = !Escape; } + if (!result) { result = line; } else { From 59a10618fa96188978f36242a9ce40fc8af63278 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 14:16:53 +0000 Subject: [PATCH 22/41] feat(TCsvParser): add method TCsvParser::GetHeaderRow() --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 4 ++++ ydb/public/lib/ydb_cli/common/csv_parser.h | 1 + 2 files changed, 5 insertions(+) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 934deab2f09b..e7b1184c6b6f 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -658,6 +658,10 @@ const TVector& TCsvParser::GetHeader() { return Header; } +const TString& TCsvParser::GetHeaderRow() const { + return HeaderRow; +} + } } diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index bc5902fa013b..927e43466c47 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -50,6 +50,7 @@ class TCsvParser { void BuildLineType(); const TVector& GetHeader(); + const TString& GetHeaderRow() const; private: TVector Header; From 204614f56dc5cd707199074af392be81e3f5d988 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 14:17:43 +0000 Subject: [PATCH 23/41] feat(import): implement upload via Apcahe Arrow with fallback on YDB Types with Protobuf Arena --- ydb/public/lib/ydb_cli/import/import.cpp | 181 ++++++++++++++++++++--- 1 file changed, 161 insertions(+), 20 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index d66eede319f8..d845c85ef1a1 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -559,6 +560,12 @@ class TImportFileClient::TImpl { TAsyncStatus UpsertTValueBuffer(const TString& dbPath, TValueBuilder& builder); TAsyncStatus UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc); + TAsyncStatus UpsertTValueBufferParquet( + const TString& dbPath, + std::shared_ptr batch, + const arrow::ipc::IpcWriteOptions& writeOptions + ); + TAsyncStatus UpsertTValueBufferOnArena( const TString& dbPath, std::function&& buildFunc); @@ -944,6 +951,45 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, } +inline +TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( + const TString& dbPath, + std::shared_ptr batch, + const arrow::ipc::IpcWriteOptions& writeOptions +) { + if (!RequestsInflight->try_acquire()) { + if (Settings.Verbose_ && Settings.NewlineDelimited_) { + if (!InformedAboutLimit.exchange(true)) { + Cerr << (TStringBuilder() << "@ (each '@' means max request inflight is reached and a worker thread is waiting for " + "any response from database)" << Endl); + } else { + Cerr << '@'; + } + } + RequestsInflight->acquire(); + } + + auto retryFunc = [parquet = NYdb_cli::NArrow::SerializeBatch(batch, writeOptions), + schema = NYdb_cli::NArrow::SerializeSchema(*batch->schema()), + dbPath](NTable::TTableClient& client) { + return client.BulkUpsert(dbPath, NTable::EDataFormat::ApacheArrow, parquet, schema) + .Apply([](const NTable::TAsyncBulkUpsertResult& result) { + return TStatus(result.GetValueSync()); + }); + }; + + return TableClient->RetryOperation(std::move(retryFunc), RetrySettings) + .Apply([this](const TAsyncStatus& asyncStatus) { + NYdb::TStatus status = asyncStatus.GetValueSync(); + if (!status.IsSuccess()) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(status); + } + } + RequestsInflight->release(); + return asyncStatus; + }); +} @@ -973,6 +1019,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( // Running heavy building task on processing pool: return NThreading::Async(std::move(buildTValueAndSendRequest), *ProcessingPool); }; + if (!RequestsInflight->try_acquire()) { if (Settings.Verbose_ && Settings.NewlineDelimited_) { if (!InformedAboutLimit.exchange(true)) { @@ -984,6 +1031,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( } RequestsInflight->acquire(); } + return TableClient->RetryOperation(std::move(retryFunc), RetrySettings) .Apply([this](const TAsyncStatus& asyncStatus) { NYdb::TStatus status = asyncStatus.GetValueSync(); @@ -1030,6 +1078,11 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, ui64 skippedBytes = 0; ui64 nextSkipBorder = VerboseModeStepSize; + std::cerr << "Settings.Header_: " << Settings.Header_ << std::endl; + std::cerr << "Settings.SkipRows_: " << Settings.SkipRows_ << std::endl; + std::cerr << "rowsToSkip: " << rowsToSkip << std::endl; + std::cerr << "row: " << row << std::endl; + TString line; std::vector inFlightRequests; std::vector buffer; @@ -1063,32 +1116,120 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; + // Note: table = dbPath (path to the table on the server) + auto columns = DbTableInfo->GetTableColumns(); + + std::cerr << "columns: " << columns.size() << std::endl; + for (const auto& column : columns) { + std::cerr << "column: " << column.Name << " " << column.Type.ToString() << std::endl; + } + + const Ydb::Formats::CsvSettings csvSettings = ([this]() { + Ydb::Formats::CsvSettings settings; + settings.set_delimiter(Settings.Delimiter_); + settings.set_header(Settings.Header_); + if (Settings.NullValue_.has_value()) { + settings.set_null_value(Settings.NullValue_.value()); + } + settings.set_skip_rows(Settings.SkipRows_); + return settings; + }()); + + auto writeOptions = arrow::ipc::IpcWriteOptions::Defaults(); + // TODO: what codec to use? ZSTD is used in ydb_workload_import.cpp; use it here as well? + constexpr auto codecType = arrow::Compression::type::UNCOMPRESSED; + writeOptions.codec = *arrow::util::Codec::Create(codecType); + + // determines if we should fallback to YDB Types (TValue with Protobuf arena) when Apache Arrow failed. + // Apache Arrow tends to be faster but doesn't support all YDB types (e.g. TzDatetime). On the other hand, + // YDB Types are slower but support all YDB types. + std::atomic_bool fallbackToYDBTypes = false; + + auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { - try { - return parser.BuildListOnArena(buffer, filePath, arena, row); - } catch (const std::exception& e) { - if (!Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); + // by default, we attempt to send data via Apache Arrow, if it fails, we fallback to YDB Types (TValue with Protobuf arena) + if (!fallbackToYDBTypes.load()) { + const i64 estimatedCsvLineLength = (!buffer.empty() ? 2 * buffer.front().size() : 10'000); + TStringBuilder data; + data.reserve((buffer.size() + (Settings.Header_ ? 1 : 0)) * estimatedCsvLineLength); + // insert header if it is present in the given csv file + if (Settings.Header_) { + data << parser.GetHeaderRow() << Endl; + } + data << JoinSeq("\n", buffer) << Endl; + + // I: attempt to send data via Apache Arrow + // if header is present, it is expected to be the first line of the data + TString error; + auto arrowCsv = NKikimr::NFormats::TArrowCSVTable::Create(columns, Settings.Header_); + if (arrowCsv.ok()) { + if (auto batch = arrowCsv->ReadSingleBatch(data, csvSettings, error)) { + if (!error) { + // batch was read successfully, sending data via Apache Arrow + UpsertTValueBufferParquet(dbPath, std::move(batch), writeOptions) + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + jobInflightManager->ReleaseJob(); + if (asyncStatus.GetValueSync().IsSuccess()) { + batchStatus->Completed = true; + if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + "Couldn't add worker func to save progress")); + } + } + return asyncStatus; + }); + } + else { + error = "Error while reading a batch from Apache Arrow: " + error; + fallbackToYDBTypes.store(true); + } + } + else { + error = "Could not read a batch from Apache Arrow"; + fallbackToYDBTypes.store(true); } - jobInflightManager->ReleaseJob(); - throw; } - }; + else { + error = arrowCsv.status().ToString(); + fallbackToYDBTypes.store(true); + } + + // print the error if fallback requested + if (fallbackToYDBTypes.load()) { + std::cerr << "Error when trying to import via Apache Arrow: '" << error << "'.\n" + << "Falling back to import via YDB Types (no action needed from your side)." << std::endl; + } + } - // TODO: create arena here and pass it to UpsertTValueBufferOnArena? - UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) - .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { - jobInflightManager->ReleaseJob(); - if (asyncStatus.GetValueSync().IsSuccess()) { - batchStatus->Completed = true; - if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, - "Couldn't add worker func to save progress")); + // check if fallback requested (after first Apache Arrow failure, we will always fallback to YDB Types) + if (fallbackToYDBTypes.load()) { + // II: fallback to TValue with Protobuf arena because Apache Arrow failed + auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { + try { + return parser.BuildListOnArena(buffer, filePath, arena, row); + } catch (const std::exception& e) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); } + jobInflightManager->ReleaseJob(); + throw; } - return asyncStatus; - }); + }; + + // TODO: create arena here and pass it to UpsertTValueBufferOnArena? + UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + jobInflightManager->ReleaseJob(); + if (asyncStatus.GetValueSync().IsSuccess()) { + batchStatus->Completed = true; + if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + "Couldn't add worker func to save progress")); + } + } + return asyncStatus; + }); + } }; for (ui32 i = 0; i < rowsToSkip; ++i) { From 08a0bcc9197dbe8a67c46caf9575cdd41942c921 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 8 Jun 2025 14:27:18 +0000 Subject: [PATCH 24/41] refactor: remove newlines and commented code --- .../sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 4 ---- .../impl/ydb_internal/grpc_connections/grpc_connections.h | 8 -------- ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp | 7 ------- 3 files changed, 19 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 7e5702b5d3f4..217495e35659 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -312,9 +312,6 @@ class TArenaAllocatedValue { std::shared_ptr Impl_; }; - - - class TValueParser : public TMoveOnly { friend class TResultSetParser; public: @@ -436,7 +433,6 @@ class TValueParser : public TMoveOnly { class TValueBuilderImpl; - template class TValueBuilderBase : public TMoveOnly { friend TDerived; diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 4a457de8642a..3e4c4c2c8c38 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -473,8 +473,6 @@ class TGRpcConnectionsImpl std::move(context)); } - - template void RunDeferredOnArena( TRequest* request, @@ -528,9 +526,6 @@ class TGRpcConnectionsImpl std::move(context)); } - - - // Run request using discovery endpoint. // Mostly usefull to make calls from credential provider template @@ -587,9 +582,6 @@ class TGRpcConnectionsImpl context); } - - - template void RunDeferredOnArena( TRequest* request, diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 79a448fe3289..01d3413ba681 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1028,12 +1028,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( const TBulkUpsertSettings& settings ) { auto request = MakeOperationRequestOnArena(settings, arena); - // std::cout << "request->GetArena(): " << request->GetArena() << "\n" - // << "request->mutable_rows()->GetArena(): " << request->mutable_rows()->GetArena() << "\n" - // << "Value->GetArena(): " << rows.second->GetArena() << "\n" - // << "given arena: " << arena << std::endl; - // assert(request->GetArena() == arena); - // assert(request->mutable_rows()->GetArena() == arena); request->set_table(TStringType{table}); // TODO: Ydb::Type still gets copied because request is arena-allocated and rows' Type is not *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); @@ -1063,7 +1057,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); From dea9dc19509bdab81b775f7930826bdcd4ef2ba4 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 17:15:54 +0000 Subject: [PATCH 25/41] Revert "feat(TLinesSplitter::ConsumeLine): replace bool negation with quote chars counting" This reverts commit d3961f2ad9091154aed921382b58e9f935a087ab. --- library/cpp/string_utils/csv/csv.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp index 823c96d6904a..fd3d932fcd5c 100644 --- a/library/cpp/string_utils/csv/csv.cpp +++ b/library/cpp/string_utils/csv/csv.cpp @@ -1,5 +1,3 @@ -#include - #include "csv.h" TStringBuf NCsvFormat::CsvSplitter::Consume() { @@ -72,12 +70,11 @@ TString NCsvFormat::TLinesSplitter::ConsumeLine() { TString result; TString line; while (Input.ReadLine(line)) { - const size_t quoteCount = std::count(line.cbegin(), line.cend(), Quote); - // TODO: use `& 1`? - if (quoteCount % 2 == 1) { - Escape = !Escape; + for (auto it = line.begin(); it != line.end(); ++it) { + if (*it == Quote) { + Escape = !Escape; + } } - if (!result) { result = line; } else { From 9ecf7fc4047e69d8d35d7514f52c4f7a492042e7 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 17:24:54 +0000 Subject: [PATCH 26/41] refactor: remove redundant includes from value.h --- ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 217495e35659..3c03287b8761 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -2,12 +2,10 @@ #include "fwd.h" -#include #include #include #include -#include namespace Ydb { class Type; From c41896ac2ec30726eaf7a8dda9221e28d67ee2bd Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 17:48:13 +0000 Subject: [PATCH 27/41] refactor: remove TArenaAllocatedValue class (use TValue instead) --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 4 +- ydb/public/lib/ydb_cli/common/csv_parser.h | 2 +- ydb/public/lib/ydb_cli/import/import.cpp | 9 +-- .../include/ydb-cpp-sdk/client/table/table.h | 2 +- .../include/ydb-cpp-sdk/client/value/value.h | 18 +----- .../src/client/table/impl/table_client.cpp | 2 +- .../cpp/src/client/table/impl/table_client.h | 13 +++- ydb/public/sdk/cpp/src/client/table/table.cpp | 5 +- ydb/public/sdk/cpp/src/client/value/value.cpp | 63 ++++++------------- 9 files changed, 45 insertions(+), 73 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index e7b1184c6b6f..82c7df48c397 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -553,7 +553,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f -TArenaAllocatedValue TCsvParser::BuildListOnArena( +TValue TCsvParser::BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, @@ -580,7 +580,7 @@ TArenaAllocatedValue TCsvParser::BuildListOnArena( } // Return a TValue that references the arena-allocated message - return TArenaAllocatedValue(ResultListType.value(), value); + return TValue(ResultListType.value(), value); } // Helper method to process a single CSV line diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 927e43466c47..51954c91358b 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -41,7 +41,7 @@ class TCsvParser { TValue BuildList(const std::vector& lines, const TString& filename, std::optional row = std::nullopt) const; - TArenaAllocatedValue BuildListOnArena( + TValue BuildListOnArena( const std::vector& lines, const TString& filename, google::protobuf::Arena* arena, diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index d845c85ef1a1..709153ace897 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -567,7 +567,7 @@ class TImportFileClient::TImpl { ); TAsyncStatus UpsertTValueBufferOnArena( - const TString& dbPath, std::function&& buildFunc); + const TString& dbPath, std::function&& buildFunc); TStatus UpsertJson(IInputStream &input, const TString &dbPath, std::optional inputSizeHint, ProgressCallbackFunc & progressCallback); @@ -995,11 +995,11 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( - const TString& dbPath, std::function&& buildFunc) { + const TString& dbPath, std::function&& buildFunc) { auto arena = std::make_shared(); // For the first attempt values are built before acquiring request inflight semaphore - std::optional prebuiltValue = buildFunc(arena.get()); + std::optional prebuiltValue = buildFunc(arena.get()); auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue), arena = std::move(arena)] @@ -1007,8 +1007,9 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue, arena]() { // For every retry attempt after first request build value from strings again // to prevent copying data in retryFunc in a happy way when there is only one request - TArenaAllocatedValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); + TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; + return tableClient.BulkUpsertUnretryableArenaAllocated( dbPath, std::move(builtValue), arena.get(), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index f2fdd5a64758..4e488cf18b98 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1269,7 +1269,7 @@ class TTableClient { TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - TArenaAllocatedValue&& rows, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 3c03287b8761..bfe5da1ebfe8 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -278,6 +278,7 @@ class TValue { public: TValue(const TType& type, const Ydb::Value& valueProto); TValue(const TType& type, Ydb::Value&& valueProto); + TValue(const TType& type, Ydb::Value* arenaAllocatedValueProto); const TType& GetType() const; TType& GetType(); @@ -293,23 +294,6 @@ class TValue { }; -class TArenaAllocatedValue { -public: - TArenaAllocatedValue(const TType& type, Ydb::Value* value); - - const TType& GetType() const; - TType& GetType(); - - const Ydb::Value* GetProto() const; - Ydb::Value* GetProto(); - - Ydb::Value&& ExtractProto() &&; - -private: - class TImpl; - std::shared_ptr Impl_; -}; - class TValueParser : public TMoveOnly { friend class TResultSetParser; public: diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 01d3413ba681..6173e13c5540 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1023,7 +1023,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( const std::string& table, - TArenaAllocatedValue&& rows, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ) { diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 8ff8d83a3281..322ce30f93a8 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -144,9 +144,20 @@ class TTableClient::TImpl: public TClientImplCommon, public */ TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, + * therefore, it is not retryable with the same `rows` object. + * + * The `Ydb::Value` is expected to be arena-allocated. + * + * See: https://protobuf.dev/reference/cpp/arenas + * + * @param arena The arena to allocate the `Ydb::Value` in. + * @param settings The settings for the bulk upsert operation. + */ TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( const std::string& table, - TArenaAllocatedValue&& rows, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings ); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index 44ae34a2aad9..ec68dc7d8428 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1463,8 +1463,9 @@ TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& ta return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated(const std::string& table, - TArenaAllocatedValue&& rows, +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated( + const std::string& table, + TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 8626e4b709de..94e16990b310 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -1050,32 +1050,28 @@ class TValue::TImpl { public: TImpl(const TType& type, const Ydb::Value& valueProto) : Type_(type) - , ProtoValue_(valueProto) {} + , ProtoValue_(valueProto) + , ArenaAllocatedValueProto_(nullptr) {} TImpl(const TType& type, Ydb::Value&& valueProto) : Type_(type) - , ProtoValue_(std::move(valueProto)) {} - - TType Type_; - Ydb::Value ProtoValue_; -}; - -/** -* Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. -*/ -class TArenaAllocatedValue::TImpl { -public: - TImpl(const TType& type, Ydb::Value* value) + , ProtoValue_(std::move(valueProto)) + , ArenaAllocatedValueProto_(nullptr) {} + + /** + * Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. + * The `Ydb::Value` is expected to be arena-allocated. + * + * See: https://protobuf.dev/reference/cpp/arenas + */ + TImpl(const TType& type, Ydb::Value* arenaAllocatedValueProto) : Type_(type) - , ArenaAllocatedProtoValue_(value) { - // assert that the value is indeed arena-allocated - Y_ASSERT(ArenaAllocatedProtoValue_ != nullptr); - Y_ASSERT(ArenaAllocatedProtoValue_->GetArena() != nullptr); - } + , ProtoValue_{} + , ArenaAllocatedValueProto_(arenaAllocatedValueProto) {} - // TODO: make Ydb::Type also arena-allocated? TType Type_; - Ydb::Value* ArenaAllocatedProtoValue_; + Ydb::Value ProtoValue_; + Ydb::Value* ArenaAllocatedValueProto_; }; //////////////////////////////////////////////////////////////////////////////// @@ -1086,6 +1082,9 @@ TValue::TValue(const TType& type, const Ydb::Value& valueProto) TValue::TValue(const TType& type, Ydb::Value&& valueProto) : Impl_(new TImpl(type, std::move(valueProto))) {} +TValue::TValue(const TType& type, Ydb::Value* arenaAllocatedValueProto) + : Impl_(new TImpl(type, arenaAllocatedValueProto)) {} + const TType& TValue::GetType() const { return Impl_->Type_; } @@ -1106,30 +1105,6 @@ Ydb::Value&& TValue::ExtractProto() && { return std::move(Impl_->ProtoValue_); } - -TArenaAllocatedValue::TArenaAllocatedValue(const TType& type, Ydb::Value* value) - : Impl_(new TImpl(type, value)) {} - -const TType& TArenaAllocatedValue::GetType() const { - return Impl_->Type_; -} - -TType& TArenaAllocatedValue::GetType() { - return Impl_->Type_; -} - -const Ydb::Value* TArenaAllocatedValue::GetProto() const { - return Impl_->ArenaAllocatedProtoValue_; -} - -Ydb::Value* TArenaAllocatedValue::GetProto() { - return Impl_->ArenaAllocatedProtoValue_; -} - -Ydb::Value&& TArenaAllocatedValue::ExtractProto() && { - return std::move(*Impl_->ArenaAllocatedProtoValue_); -} - //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From fd4afeeda658dcbeaece474abc6d80aad65dd514 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 18:16:25 +0000 Subject: [PATCH 28/41] feat: rename unretryable methods to BulkUpsertUnsafe and add docs --- ydb/public/lib/ydb_cli/import/import.cpp | 4 +- .../include/ydb-cpp-sdk/client/table/table.h | 63 +++++++++++++++++-- .../src/client/table/impl/table_client.cpp | 3 +- .../cpp/src/client/table/impl/table_client.h | 21 +------ ydb/public/sdk/cpp/src/client/table/table.cpp | 8 +-- 5 files changed, 69 insertions(+), 30 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 709153ace897..7f39e3682e3a 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -917,7 +917,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, // to prevent copying data in retryFunc in a happy way when there is only one request TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(); prebuiltValue = std::nullopt; - return tableClient.BulkUpsertUnretryable(dbPath, std::move(builtValue), UpsertSettings) + return tableClient.BulkUpsertUnretryableUnsafe(dbPath, std::move(builtValue), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); return NThreading::MakeFuture(status); @@ -1010,7 +1010,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; - return tableClient.BulkUpsertUnretryableArenaAllocated( + return tableClient.BulkUpsertUnretryableArenaAllocatedUnsafe( dbPath, std::move(builtValue), arena.get(), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 4e488cf18b98..3e1dcf28dc15 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1261,13 +1261,68 @@ class TTableClient { NThreading::TFuture Stop(); /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, - * therefore, it is not retryable with the same `rows` object. + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model. + * + * This method is not retryable with the same `rows` object because it moves the underlying data + * from the TValue object into the request proto model. The TValue class uses a shared pointer + * to store its implementation data, so when you make a copy of a TValue object, both copies + * share the same underlying data. When this method moves the data from one TValue instance, + * all other instances sharing the same implementation pointer will be left with moved-from data. + * + * Example of incorrect usage: + * ``` + * TValue originalValue = BuildValue(); + * retry { + * TValue copy = originalValue; // Both originalValue and copy share the same data + * client.BulkUpsertUnretryableUnsafe(std::move(copy)); // Moves data from copy, leaving originalValue in moved-from state + * } + * ``` + * CAUTION: It is unsafe to use the instance of `originalValue` because its underlying data is moved! + * + * To safely retry the operation, you need to create a new TValue with fresh data for each attempt. */ - TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, + TAsyncBulkUpsertResult BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); - TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + /** + * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model. The `Ydb::Value` of `TValue` must be arena-allocated. + * + * This method is not retryable with the same `rows` object because it moves the underlying data + * from the `TValue` object into the request proto model. The `TValue` class uses a shared pointer + * to store its implementation data, so when you make a copy of a `TValue` object, both copies + * share the same underlying data. When this method moves the data from one `TValue` instance, + * all other instances sharing the same implementation pointer will be left with moved-from data. + * + * IMPORTANT: The `rows` parameter must contain arena-allocated protobuf data. This method + * should only be called when the `TValue`'s underlying data was allocated using the provided + * protobuf arena. Using this method with non-arena-allocated data will lead to undefined behavior. + * It is the caller's responsibility to ensure that the lifetime of the used arena is longer than the lifetime of the `TValue` object during the operation. + * + * @see: https://protobuf.dev/reference/cpp/arenas + * + * Example of incorrect usage: + * ``` + * TValue originalValue = BuildValue(); // Not arena-allocated + * retry { + * TValue copy = originalValue; // Both originalValue and copy share the same data + * client.BulkUpsertUnretryableArenaAllocated(std::move(copy), arena); // WRONG: data not arena-allocated + * } + * ``` + * + * Example of correct usage: + * ``` + * google::protobuf::Arena arena; + * TValue arenaValue = BuildValueOnArena(&arena); // Value built using arena allocation + * client.BulkUpsertUnretryableArenaAllocated(std::move(arenaValue), &arena); + * ``` + * + * To safely retry the operation, you need to create a new `TValue` with fresh arena-allocated data for each attempt. + * + * @param arena The protobuf arena that was used to allocate the data in `rows`. Must be the same + * arena that was used to create the TValue's underlying data. + * @param settings The settings for the bulk upsert operation. + */ + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 6173e13c5540..49877979c418 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1021,7 +1021,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, } -TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( +TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, @@ -1029,7 +1029,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocated( ) { auto request = MakeOperationRequestOnArena(settings, arena); request->set_table(TStringType{table}); - // TODO: Ydb::Type still gets copied because request is arena-allocated and rows' Type is not *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); *request->mutable_rows()->mutable_value() = std::move(rows).ExtractProto(); diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 322ce30f93a8..8bcfa7640462 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -138,24 +138,9 @@ class TTableClient::TImpl: public TClientImplCommon, public void SetStatCollector(const NSdkStats::TStatCollector::TClientStatCollector& collector); - /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, - * therefore, it is not retryable with the same `rows` object. - */ - TAsyncBulkUpsertResult BulkUpsertUnretryable(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); - - /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model, - * therefore, it is not retryable with the same `rows` object. - * - * The `Ydb::Value` is expected to be arena-allocated. - * - * See: https://protobuf.dev/reference/cpp/arenas - * - * @param arena The arena to allocate the `Ydb::Value` in. - * @param settings The settings for the bulk upsert operation. - */ - TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocated( + TAsyncBulkUpsertResult BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); + + TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index ec68dc7d8428..bb50789afb76 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1457,19 +1457,19 @@ NThreading::TFuture TTableClient::Stop() { return Impl_->Stop(); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryable(const std::string& table, TValue&& rows, +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { - return Impl_->BulkUpsertUnretryable(table, std::move(rows), settings); + return Impl_->BulkUpsertUnretryableUnsafe(table, std::move(rows), settings); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocated( +TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, google::protobuf::Arena* arena, const TBulkUpsertSettings& settings) { - return Impl_->BulkUpsertUnretryableArenaAllocated(table, std::move(rows), arena, settings); + return Impl_->BulkUpsertUnretryableArenaAllocatedUnsafe(table, std::move(rows), arena, settings); } TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, From f22befaf9c2dfaafc3c30361145c413440435135 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 18:48:10 +0000 Subject: [PATCH 29/41] refactor: move logic of RunDeferred and RUnDeferredOnArena into impl method --- .../grpc_connections/grpc_connections.h | 161 +++++++++--------- 1 file changed, 77 insertions(+), 84 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 3e4c4c2c8c38..2a1f3346370f 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -276,10 +276,6 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - - - - template void RunOnArena( TRequest* request, @@ -416,10 +412,6 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - - - - template void RunDeferred( TRequest&& request, @@ -429,48 +421,17 @@ class TGRpcConnectionsImpl TDuration deferredTimeout, const TRpcRequestSettings& requestSettings, bool poll = false, - std::shared_ptr context = nullptr) - { - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, status); - return; - } - - auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] - (TResponse* response, TPlainStatus status) mutable - { - if (response) { - Ydb::Operations::Operation* operation = response->mutable_operation(); - if (!operation->ready() && poll) { - auto action = MakeIntrusive( - operation->id(), - std::move(userResponseCb), - this, - std::move(context), - deferredTimeout, - dbState, - status.Endpoint); - - action->Start(); - } else { - NYdb::NIssue::TIssues opIssues; - NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); - userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), - status.Endpoint, std::move(status.Metadata)}); - } - } else { - userResponseCb(nullptr, status); - } - }; - - Run( + std::shared_ptr context = nullptr) { + RunDeferredImpl( std::forward(request), - responseCb, + std::move(userResponseCb), rpc, dbState, + deferredTimeout, requestSettings, - std::move(context)); + poll, + std::move(context) + ); } template @@ -482,48 +443,17 @@ class TGRpcConnectionsImpl TDuration deferredTimeout, const TRpcRequestSettings& requestSettings, bool poll = false, - std::shared_ptr context = nullptr) - { - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, status); - return; - } - - auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] - (TResponse* response, TPlainStatus status) mutable - { - if (response) { - Ydb::Operations::Operation* operation = response->mutable_operation(); - if (!operation->ready() && poll) { - auto action = MakeIntrusive( - operation->id(), - std::move(userResponseCb), - this, - std::move(context), - deferredTimeout, - dbState, - status.Endpoint); - - action->Start(); - } else { - NYdb::NIssue::TIssues opIssues; - NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); - userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), - status.Endpoint, std::move(status.Metadata)}); - } - } else { - userResponseCb(nullptr, status); - } - }; - - RunOnArena( + std::shared_ptr context = nullptr) { + RunDeferredImpl( request, - responseCb, + std::move(userResponseCb), rpc, dbState, + deferredTimeout, requestSettings, - std::move(context)); + poll, + std::move(context) + ); } // Run request using discovery endpoint. @@ -836,6 +766,69 @@ template const TLog& GetLog() const override; private: + template + void RunDeferredImpl( + std::conditional_t request, + TDeferredOperationCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + TDuration deferredTimeout, + const TRpcRequestSettings& requestSettings, + bool poll = false, + std::shared_ptr context = nullptr) { + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, status); + return; + } + + auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] + (TResponse* response, TPlainStatus status) mutable + { + if (response) { + Ydb::Operations::Operation* operation = response->mutable_operation(); + if (!operation->ready() && poll) { + auto action = MakeIntrusive( + operation->id(), + std::move(userResponseCb), + this, + std::move(context), + deferredTimeout, + dbState, + status.Endpoint); + + action->Start(); + } else { + NYdb::NIssue::TIssues opIssues; + NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); + userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), + status.Endpoint, std::move(status.Metadata)}); + } + } else { + userResponseCb(nullptr, status); + } + }; + + if constexpr (RequestOnArena) { + RunOnArena( + request, + responseCb, + rpc, + dbState, + requestSettings, + std::move(context)); + } + else { + Run( + std::forward(request), + responseCb, + rpc, + dbState, + requestSettings, + std::move(context)); + } + } + template void WithServiceConnection(TCallback callback, TDbDriverStatePtr dbState, const TEndpointKey& preferredEndpoint, TRpcRequestSettings::TEndpointPolicy endpointPolicy) From d77d63e5ddc2ce51190dd584d1420cd91de45021 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Fri, 13 Jun 2025 19:23:32 +0000 Subject: [PATCH 30/41] refactor: move logic of Run and RunOnArena into impl method (untested!) Requires testing via a real instance of YDBD to ensure that the refactoring is correct. --- .../grpc_connections/grpc_connections.h | 698 +++++++++++------- 1 file changed, 450 insertions(+), 248 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 2a1f3346370f..123472509e17 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -140,6 +140,7 @@ class TGRpcConnectionsImpl TRequest, TResponse>::TAsyncRequest; + template void Run( TRequest&& request, @@ -149,133 +150,143 @@ class TGRpcConnectionsImpl const TRpcRequestSettings& requestSettings, std::shared_ptr context = nullptr) { - using NYdbGrpc::TGrpcStatus; - using TConnection = std::unique_ptr>; - Y_ABORT_UNLESS(dbState); - - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - return; - } - - if (dbState->StatCollector.IsCollecting()) { - std::weak_ptr weakState = dbState; - const auto startTime = TInstant::Now(); - userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - const auto resultSize = response ? response->ByteSizeLong() : 0; - cb(response, status); - - if (auto state = weakState.lock()) { - state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - state->StatCollector.IncResultSize(resultSize); - } - }); - } - - WithServiceConnection( - [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - if (!status.Ok()) { - userResponseCb( - nullptr, - std::move(status)); - return; - } - - TCallMeta meta; - meta.Timeout = requestSettings.ClientTimeout; - #ifndef YDB_GRPC_UNSECURE_AUTH - meta.CallCredentials = dbState->CallCredentials; - #else - if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - try { - meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - } catch (const std::exception& e) { - userResponseCb( - nullptr, - TPlainStatus( - EStatus::CLIENT_UNAUTHENTICATED, - TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - ) - ); - return; - } - } - #endif - if (!requestSettings.TraceId.empty()) { - meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - } - - if (!requestSettings.RequestType.empty()) { - meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - } - - if (!dbState->Database.empty()) { - SetDatabaseHeader(meta, dbState->Database); - } - - static const std::string clientPid = GetClientPIDHeaderValue(); - - meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - dbState->StatCollector.IncGRpcInFlight(); - dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - NYdbGrpc::TAdvancedResponseCallback responseCbLow = - [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - dbState->StatCollector.DecGRpcInFlight(); - dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - std::multimap metadata; - - for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - - auto resp = new TResult( - std::move(response), - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint(), - std::move(metadata)); - - EnqueueResponse(resp); - } else { - dbState->StatCollector.IncReqFailDueTransportError(); - dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - auto resp = new TGRpcErrorResponse( - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint()); - - dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - EnqueueResponse(resp); - } - }; + RunImpl( + std::forward(request), + std::move(userResponseCb), + rpc, + dbState, + requestSettings, + std::move(context) + ); - serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, - context.get()); - }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); + // using NYdbGrpc::TGrpcStatus; + // using TConnection = std::unique_ptr>; + // Y_ABORT_UNLESS(dbState); + + // if (!TryCreateContext(context)) { + // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + // return; + // } + + // if (dbState->StatCollector.IsCollecting()) { + // std::weak_ptr weakState = dbState; + // const auto startTime = TInstant::Now(); + // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + // const auto resultSize = response ? response->ByteSizeLong() : 0; + // cb(response, status); + + // if (auto state = weakState.lock()) { + // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + // state->StatCollector.IncResultSize(resultSize); + // } + // }); + // } + + // WithServiceConnection( + // [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + // if (!status.Ok()) { + // userResponseCb( + // nullptr, + // std::move(status)); + // return; + // } + + // TCallMeta meta; + // meta.Timeout = requestSettings.ClientTimeout; + // #ifndef YDB_GRPC_UNSECURE_AUTH + // meta.CallCredentials = dbState->CallCredentials; + // #else + // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + // try { + // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + // } catch (const std::exception& e) { + // userResponseCb( + // nullptr, + // TPlainStatus( + // EStatus::CLIENT_UNAUTHENTICATED, + // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + // ) + // ); + // return; + // } + // } + // #endif + // if (!requestSettings.TraceId.empty()) { + // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + // } + + // if (!requestSettings.RequestType.empty()) { + // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + // } + + // if (!dbState->Database.empty()) { + // SetDatabaseHeader(meta, dbState->Database); + // } + + // static const std::string clientPid = GetClientPIDHeaderValue(); + + // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + // dbState->StatCollector.IncGRpcInFlight(); + // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + // NYdbGrpc::TAdvancedResponseCallback responseCbLow = + // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + // dbState->StatCollector.DecGRpcInFlight(); + // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + // std::multimap metadata; + + // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + + // auto resp = new TResult( + // std::move(response), + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint(), + // std::move(metadata)); + + // EnqueueResponse(resp); + // } else { + // dbState->StatCollector.IncReqFailDueTransportError(); + // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + // auto resp = new TGRpcErrorResponse( + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint()); + + // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + // EnqueueResponse(resp); + // } + // }; + + // serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, + // context.get()); + // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } + template void RunOnArena( TRequest* request, @@ -285,131 +296,142 @@ class TGRpcConnectionsImpl const TRpcRequestSettings& requestSettings, std::shared_ptr context = nullptr) { - using NYdbGrpc::TGrpcStatus; - using TConnection = std::unique_ptr>; - Y_ABORT_UNLESS(dbState); - - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - return; - } - - if (dbState->StatCollector.IsCollecting()) { - std::weak_ptr weakState = dbState; - const auto startTime = TInstant::Now(); - userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - const auto resultSize = response ? response->ByteSizeLong() : 0; - cb(response, status); - - if (auto state = weakState.lock()) { - state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - state->StatCollector.IncResultSize(resultSize); - } - }); - } - - WithServiceConnection( - [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - if (!status.Ok()) { - userResponseCb( - nullptr, - std::move(status)); - return; - } - - TCallMeta meta; - meta.Timeout = requestSettings.ClientTimeout; - #ifndef YDB_GRPC_UNSECURE_AUTH - meta.CallCredentials = dbState->CallCredentials; - #else - if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - try { - meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - } catch (const std::exception& e) { - userResponseCb( - nullptr, - TPlainStatus( - EStatus::CLIENT_UNAUTHENTICATED, - TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - ) - ); - return; - } - } - #endif - if (!requestSettings.TraceId.empty()) { - meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - } - - if (!requestSettings.RequestType.empty()) { - meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - } - - if (!dbState->Database.empty()) { - SetDatabaseHeader(meta, dbState->Database); - } - - static const std::string clientPid = GetClientPIDHeaderValue(); - - meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - dbState->StatCollector.IncGRpcInFlight(); - dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - NYdbGrpc::TAdvancedResponseCallback responseCbLow = - [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - dbState->StatCollector.DecGRpcInFlight(); - dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - std::multimap metadata; - - for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - - auto resp = new TResult( - std::move(response), - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint(), - std::move(metadata)); - - EnqueueResponse(resp); - } else { - dbState->StatCollector.IncReqFailDueTransportError(); - dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - auto resp = new TGRpcErrorResponse( - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint()); - - dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - EnqueueResponse(resp); - } - }; + RunImpl( + request, + std::move(userResponseCb), + rpc, + dbState, + requestSettings, + std::move(context) + ); - serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, - context.get()); - }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); + // using NYdbGrpc::TGrpcStatus; + // using TConnection = std::unique_ptr>; + // Y_ABORT_UNLESS(dbState); + + // if (!TryCreateContext(context)) { + // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + // return; + // } + + // if (dbState->StatCollector.IsCollecting()) { + // std::weak_ptr weakState = dbState; + // const auto startTime = TInstant::Now(); + // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + // const auto resultSize = response ? response->ByteSizeLong() : 0; + // cb(response, status); + + // if (auto state = weakState.lock()) { + // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + // state->StatCollector.IncResultSize(resultSize); + // } + // }); + // } + + + + // WithServiceConnection( + // [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] + // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + // if (!status.Ok()) { + // userResponseCb( + // nullptr, + // std::move(status)); + // return; + // } + + // TCallMeta meta; + // meta.Timeout = requestSettings.ClientTimeout; + // #ifndef YDB_GRPC_UNSECURE_AUTH + // meta.CallCredentials = dbState->CallCredentials; + // #else + // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + // try { + // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + // } catch (const std::exception& e) { + // userResponseCb( + // nullptr, + // TPlainStatus( + // EStatus::CLIENT_UNAUTHENTICATED, + // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + // ) + // ); + // return; + // } + // } + // #endif + // if (!requestSettings.TraceId.empty()) { + // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + // } + + // if (!requestSettings.RequestType.empty()) { + // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + // } + + // if (!dbState->Database.empty()) { + // SetDatabaseHeader(meta, dbState->Database); + // } + + // static const std::string clientPid = GetClientPIDHeaderValue(); + + // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + // dbState->StatCollector.IncGRpcInFlight(); + // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + // NYdbGrpc::TAdvancedResponseCallback responseCbLow = + // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + // dbState->StatCollector.DecGRpcInFlight(); + // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + // std::multimap metadata; + + // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + // metadata.emplace( + // std::string(name.begin(), name.end()), + // std::string(value.begin(), value.end())); + // } + + // auto resp = new TResult( + // std::move(response), + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint(), + // std::move(metadata)); + + // EnqueueResponse(resp); + // } else { + // dbState->StatCollector.IncReqFailDueTransportError(); + // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + // auto resp = new TGRpcErrorResponse( + // std::move(grpcStatus), + // std::move(userResponseCb), + // this, + // std::move(context), + // endpoint.GetEndpoint()); + + // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + // EnqueueResponse(resp); + // } + // }; + + // serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, + // context.get()); + // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } template @@ -766,6 +788,186 @@ template const TLog& GetLog() const override; private: + template + void RunImpl( + std::conditional_t request, + TResponseCb&& userResponseCb, + TSimpleRpc rpc, + TDbDriverStatePtr dbState, + const TRpcRequestSettings& requestSettings, + std::shared_ptr context = nullptr) + { + using NYdbGrpc::TGrpcStatus; + using TConnection = std::unique_ptr>; + Y_ABORT_UNLESS(dbState); + + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + return; + } + + if (dbState->StatCollector.IsCollecting()) { + std::weak_ptr weakState = dbState; + const auto startTime = TInstant::Now(); + userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + const auto resultSize = response ? response->ByteSizeLong() : 0; + cb(response, status); + + if (auto state = weakState.lock()) { + state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + state->StatCollector.IncResultSize(resultSize); + } + }); + } + + auto createMeta = [requestSettings, dbState](const TResponseCb& userResponseCb) -> std::optional { + TCallMeta meta; + meta.Timeout = requestSettings.ClientTimeout; + #ifndef YDB_GRPC_UNSECURE_AUTH + meta.CallCredentials = dbState->CallCredentials; + #else + if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + try { + meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + } catch (const std::exception& e) { + userResponseCb( + nullptr, + TPlainStatus( + EStatus::CLIENT_UNAUTHENTICATED, + TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + ) + ); + return std::nullopt; + } + } + #endif + if (!requestSettings.TraceId.empty()) { + meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + } + + if (!requestSettings.RequestType.empty()) { + meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + } + + if (!dbState->Database.empty()) { + SetDatabaseHeader(meta, dbState->Database); + } + + static const std::string clientPid = GetClientPIDHeaderValue(); + + meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + return meta; + }; + + auto createResponseCbLow = [this, dbState]( + const TResponseCb& userResponseCb, + const TEndpointKey& endpoint, + std::shared_ptr context) -> NYdbGrpc::TAdvancedResponseCallback { + dbState->StatCollector.IncGRpcInFlight(); + dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + NYdbGrpc::TAdvancedResponseCallback responseCbLow = + [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + dbState->StatCollector.DecGRpcInFlight(); + dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + std::multimap metadata; + + for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + + auto resp = new TResult( + std::move(response), + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint(), + std::move(metadata)); + + EnqueueResponse(resp); + } else { + dbState->StatCollector.IncReqFailDueTransportError(); + dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + auto resp = new TGRpcErrorResponse( + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint()); + + dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + EnqueueResponse(resp); + } + }; + + return responseCbLow; + }; + + if constexpr (RequestOnArena) { + WithServiceConnection( + [request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState, + createMeta = std::move(createMeta), createResponseCbLow = std::move(createResponseCbLow)] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + std::optional metaOpt = createMeta(userResponseCb); + if (!metaOpt) { + return; + } + auto meta = std::move(*metaOpt); + auto responseCbLow = createResponseCbLow(userResponseCb, endpoint, context); + + serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy + ); + } + else { + WithServiceConnection( + [request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState, + createMeta = std::move(createMeta), createResponseCbLow = std::move(createResponseCbLow)] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + std::optional metaOpt = createMeta(userResponseCb); + if (!metaOpt) { + return; + } + auto meta = std::move(*metaOpt); + auto responseCbLow = createResponseCbLow(userResponseCb, endpoint, context); + + serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy + ); + } + } + template void RunDeferredImpl( std::conditional_t request, From 9e2dcb8f8a3ff3a69edf3b7d6e3b70fc0aba42ae Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sat, 14 Jun 2025 13:53:13 +0000 Subject: [PATCH 31/41] fix: return arena-allocated proto model when present in TValue::TImpl --- ydb/public/sdk/cpp/src/client/value/value.cpp | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 94e16990b310..34b489cd7ec6 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -1069,7 +1069,20 @@ class TValue::TImpl { , ProtoValue_{} , ArenaAllocatedValueProto_(arenaAllocatedValueProto) {} + const Ydb::Value& GetProto() const { + return ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_; + } + + Ydb::Value& GetProto() { + return ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_; + } + + Ydb::Value&& ExtractProto() && { + return std::move(ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_); + } + TType Type_; +private: Ydb::Value ProtoValue_; Ydb::Value* ArenaAllocatedValueProto_; }; @@ -1094,15 +1107,15 @@ TType & TValue::GetType() { } const Ydb::Value& TValue::GetProto() const { - return Impl_->ProtoValue_; + return Impl_->GetProto(); } Ydb::Value& TValue::GetProto() { - return Impl_->ProtoValue_; + return Impl_->GetProto(); } Ydb::Value&& TValue::ExtractProto() && { - return std::move(Impl_->ProtoValue_); + return std::move(std::move(*Impl_).ExtractProto()); } //////////////////////////////////////////////////////////////////////////////// @@ -1129,7 +1142,7 @@ class TValueParser::TImpl { : Value_(value.Impl_) , TypeParser_(value.GetType()) { - Reset(Value_->ProtoValue_); + Reset(Value_->GetProto()); } TImpl(const TType& type) From 4d6ce4e08313dd61acd832d85b7000cb099ae720 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sat, 14 Jun 2025 14:01:46 +0000 Subject: [PATCH 32/41] feat: set ZSTD compression in Apache Arrow codec --- ydb/public/lib/ydb_cli/import/import.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 7f39e3682e3a..efe5bacd11a8 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -1137,8 +1137,7 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, }()); auto writeOptions = arrow::ipc::IpcWriteOptions::Defaults(); - // TODO: what codec to use? ZSTD is used in ydb_workload_import.cpp; use it here as well? - constexpr auto codecType = arrow::Compression::type::UNCOMPRESSED; + constexpr auto codecType = arrow::Compression::type::ZSTD; writeOptions.codec = *arrow::util::Codec::Create(codecType); // determines if we should fallback to YDB Types (TValue with Protobuf arena) when Apache Arrow failed. From 2ee05284051955f89b26494535e28c418878f37e Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 15 Jun 2025 13:53:45 +0000 Subject: [PATCH 33/41] refactor: remove addressed TODO --- ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 49877979c418..4f14e16feffc 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1041,7 +1041,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedU promise.SetValue(std::move(val)); }; - // TODO: make sure it is correct; see what server receives // don't give ownership of request to the function Connections_->RunDeferredOnArena( request, From 4893454d60a4c65f84db9ca2bc1847811cde4254 Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 15 Jun 2025 13:57:22 +0000 Subject: [PATCH 34/41] refactor: remove printings from import.cpp --- ydb/public/lib/ydb_cli/import/import.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index efe5bacd11a8..95d508109c9c 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -852,15 +852,6 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons std::cerr << "Elapsed: " << std::setprecision(3) << duration.SecondsFloat() << " sec. Total read size: " << PrettifyBytes(TotalBytesRead) << ". Average processing speed: " << PrettifyBytes((double)TotalBytesRead / duration.SecondsFloat()) << "/s." << std::endl; - - // print strcture for the pipeline: - // elapsed_time_sec: [float] - // total_read_size_byte: [integer] - // avg_processing_speed_bytes_per_sec: [integer] - std::cout << "elapsed_time_sec: " << std::setprecision(5) << duration.SecondsFloat() << std::endl; - std::cout << "total_read_size_byte: " << TotalBytesRead << std::endl; - std::cout << "avg_processing_speed_MiB_per_sec: " << std::setprecision(5) - << ((double)TotalBytesRead / duration.SecondsFloat()) / (1024 * 1024) << std::endl; } // Removing all progress files that were a part of this import @@ -1079,11 +1070,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, ui64 skippedBytes = 0; ui64 nextSkipBorder = VerboseModeStepSize; - std::cerr << "Settings.Header_: " << Settings.Header_ << std::endl; - std::cerr << "Settings.SkipRows_: " << Settings.SkipRows_ << std::endl; - std::cerr << "rowsToSkip: " << rowsToSkip << std::endl; - std::cerr << "row: " << row << std::endl; - TString line; std::vector inFlightRequests; std::vector buffer; @@ -1120,11 +1106,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, // Note: table = dbPath (path to the table on the server) auto columns = DbTableInfo->GetTableColumns(); - std::cerr << "columns: " << columns.size() << std::endl; - for (const auto& column : columns) { - std::cerr << "column: " << column.Name << " " << column.Type.ToString() << std::endl; - } - const Ydb::Formats::CsvSettings csvSettings = ([this]() { Ydb::Formats::CsvSettings settings; settings.set_delimiter(Settings.Delimiter_); From 989ac58f3855968bb15b38aa6e5bed06aa58433a Mon Sep 17 00:00:00 2001 From: Vladislav Artiukhov Date: Sun, 15 Jun 2025 14:37:10 +0000 Subject: [PATCH 35/41] refactor: remove commented code and redundant diffs --- ydb/public/lib/ydb_cli/common/csv_parser.cpp | 3 - ydb/public/lib/ydb_cli/common/csv_parser.h | 1 - ydb/public/lib/ydb_cli/import/import.cpp | 17 +- .../include/ydb-cpp-sdk/client/value/value.h | 2 - .../grpc_connections/grpc_connections.h | 259 ------------------ .../impl/ydb_internal/make_request/make.h | 2 +- .../src/client/table/impl/table_client.cpp | 4 - 7 files changed, 3 insertions(+), 285 deletions(-) diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 82c7df48c397..1b5c3930f9c3 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -395,7 +395,6 @@ class TCsvToYdbConverter { TValueBuilder Builder; }; - TCsvParseException FormatError(const std::exception& inputError, const TCsvParser::TParseMetadata& meta, const std::optional& columnName = {}) { @@ -551,8 +550,6 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f return TValue(ResultListType.value(), std::move(valueProto)); } - - TValue TCsvParser::BuildListOnArena( const std::vector& lines, const TString& filename, diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.h b/ydb/public/lib/ydb_cli/common/csv_parser.h index 51954c91358b..7cad2fcb5d09 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.h +++ b/ydb/public/lib/ydb_cli/common/csv_parser.h @@ -2,7 +2,6 @@ #include #include -#include #include diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index 95d508109c9c..d3f5ddc41e52 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -706,7 +706,6 @@ TStatus TImportFileClient::TImpl::Import(const TVector& filePaths, cons for (size_t fileOrderNumber = 0; fileOrderNumber < filePathsSize; ++fileOrderNumber) { const auto& filePath = filePaths[fileOrderNumber]; std::shared_ptr progressFile = LoadOrStartImportProgress(filePath); - auto func = [&, fileOrderNumber, progressFile, this] { std::unique_ptr fileInput; std::optional fileSizeHint; @@ -900,7 +899,6 @@ inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, std::function&& buildFunc) { // For the first attempt values are built before acquiring request inflight semaphore std::optional prebuiltValue = buildFunc(); - auto retryFunc = [this, &dbPath, buildFunc = std::move(buildFunc), prebuiltValue = std::move(prebuiltValue)] (NYdb::NTable::TTableClient& tableClient) mutable -> TAsyncStatus { auto buildTValueAndSendRequest = [this, &buildFunc, &dbPath, &tableClient, &prebuiltValue]() { @@ -941,9 +939,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, }); } - -inline -TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( +inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( const TString& dbPath, std::shared_ptr batch, const arrow::ipc::IpcWriteOptions& writeOptions @@ -982,10 +978,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferParquet( }); } - - -inline -TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( +inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( const TString& dbPath, std::function&& buildFunc) { auto arena = std::make_shared(); @@ -1037,11 +1030,6 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( }); } - - - - - TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, const TString& dbPath, const TString& filePath, @@ -1197,7 +1185,6 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, } }; - // TODO: create arena here and pass it to UpsertTValueBufferOnArena? UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { jobInflightManager->ReleaseJob(); diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index bfe5da1ebfe8..32e4f4f99fec 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -287,13 +287,11 @@ class TValue { Ydb::Value& GetProto(); Ydb::Value&& ExtractProto() &&; - private: class TImpl; std::shared_ptr Impl_; }; - class TValueParser : public TMoveOnly { friend class TResultSetParser; public: diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 123472509e17..0a608fe85171 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -140,7 +140,6 @@ class TGRpcConnectionsImpl TRequest, TResponse>::TAsyncRequest; - template void Run( TRequest&& request, @@ -158,135 +157,8 @@ class TGRpcConnectionsImpl requestSettings, std::move(context) ); - - // using NYdbGrpc::TGrpcStatus; - // using TConnection = std::unique_ptr>; - // Y_ABORT_UNLESS(dbState); - - // if (!TryCreateContext(context)) { - // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - // return; - // } - - // if (dbState->StatCollector.IsCollecting()) { - // std::weak_ptr weakState = dbState; - // const auto startTime = TInstant::Now(); - // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - // const auto resultSize = response ? response->ByteSizeLong() : 0; - // cb(response, status); - - // if (auto state = weakState.lock()) { - // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - // state->StatCollector.IncResultSize(resultSize); - // } - // }); - // } - - // WithServiceConnection( - // [this, request = std::forward(request), userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - // if (!status.Ok()) { - // userResponseCb( - // nullptr, - // std::move(status)); - // return; - // } - - // TCallMeta meta; - // meta.Timeout = requestSettings.ClientTimeout; - // #ifndef YDB_GRPC_UNSECURE_AUTH - // meta.CallCredentials = dbState->CallCredentials; - // #else - // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - // try { - // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - // } catch (const std::exception& e) { - // userResponseCb( - // nullptr, - // TPlainStatus( - // EStatus::CLIENT_UNAUTHENTICATED, - // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - // ) - // ); - // return; - // } - // } - // #endif - // if (!requestSettings.TraceId.empty()) { - // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - // } - - // if (!requestSettings.RequestType.empty()) { - // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - // } - - // if (!dbState->Database.empty()) { - // SetDatabaseHeader(meta, dbState->Database); - // } - - // static const std::string clientPid = GetClientPIDHeaderValue(); - - // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - // dbState->StatCollector.IncGRpcInFlight(); - // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - // NYdbGrpc::TAdvancedResponseCallback responseCbLow = - // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - // dbState->StatCollector.DecGRpcInFlight(); - // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - // std::multimap metadata; - - // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - - // auto resp = new TResult( - // std::move(response), - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint(), - // std::move(metadata)); - - // EnqueueResponse(resp); - // } else { - // dbState->StatCollector.IncReqFailDueTransportError(); - // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - // auto resp = new TGRpcErrorResponse( - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint()); - - // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - // EnqueueResponse(resp); - // } - // }; - - // serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, - // context.get()); - // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - template void RunOnArena( TRequest* request, @@ -304,134 +176,6 @@ class TGRpcConnectionsImpl requestSettings, std::move(context) ); - - // using NYdbGrpc::TGrpcStatus; - // using TConnection = std::unique_ptr>; - // Y_ABORT_UNLESS(dbState); - - // if (!TryCreateContext(context)) { - // TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - // userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - // return; - // } - - // if (dbState->StatCollector.IsCollecting()) { - // std::weak_ptr weakState = dbState; - // const auto startTime = TInstant::Now(); - // userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - // const auto resultSize = response ? response->ByteSizeLong() : 0; - // cb(response, status); - - // if (auto state = weakState.lock()) { - // state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - // state->StatCollector.IncResultSize(resultSize); - // } - // }); - // } - - - - // WithServiceConnection( - // [this, request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState] - // (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - // if (!status.Ok()) { - // userResponseCb( - // nullptr, - // std::move(status)); - // return; - // } - - // TCallMeta meta; - // meta.Timeout = requestSettings.ClientTimeout; - // #ifndef YDB_GRPC_UNSECURE_AUTH - // meta.CallCredentials = dbState->CallCredentials; - // #else - // if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - // try { - // meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - // } catch (const std::exception& e) { - // userResponseCb( - // nullptr, - // TPlainStatus( - // EStatus::CLIENT_UNAUTHENTICATED, - // TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - // ) - // ); - // return; - // } - // } - // #endif - // if (!requestSettings.TraceId.empty()) { - // meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - // } - - // if (!requestSettings.RequestType.empty()) { - // meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - // } - - // if (!dbState->Database.empty()) { - // SetDatabaseHeader(meta, dbState->Database); - // } - - // static const std::string clientPid = GetClientPIDHeaderValue(); - - // meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - // meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - // meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - // dbState->StatCollector.IncGRpcInFlight(); - // dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - // NYdbGrpc::TAdvancedResponseCallback responseCbLow = - // [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - // (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - // dbState->StatCollector.DecGRpcInFlight(); - // dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - // if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - // std::multimap metadata; - - // for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - // for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - // metadata.emplace( - // std::string(name.begin(), name.end()), - // std::string(value.begin(), value.end())); - // } - - // auto resp = new TResult( - // std::move(response), - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint(), - // std::move(metadata)); - - // EnqueueResponse(resp); - // } else { - // dbState->StatCollector.IncReqFailDueTransportError(); - // dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - // auto resp = new TGRpcErrorResponse( - // std::move(grpcStatus), - // std::move(userResponseCb), - // this, - // std::move(context), - // endpoint.GetEndpoint()); - - // dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - // EnqueueResponse(resp); - // } - // }; - - // serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, - // context.get()); - // }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } template @@ -564,9 +308,6 @@ template context); } - - - template class TStream> using TStreamRpc = typename TStream< diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h index 392fd35cf793..742e6a8a53e0 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/make_request/make.h @@ -49,12 +49,12 @@ TProtoRequest MakeOperationRequest(const TRequestSettings& settings) { template TProtoRequest* MakeRequestOnArena(google::protobuf::Arena* arena) { - assert(arena != nullptr); return google::protobuf::Arena::CreateMessage(arena); } template TProtoRequest* MakeOperationRequestOnArena(const TRequestSettings& settings, google::protobuf::Arena* arena) { + Y_ASSERT(arena != nullptr); auto request = MakeRequestOnArena(arena); FillOperationParams(settings, *request); return request; diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 4f14e16feffc..9b73fc276097 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1020,7 +1020,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, return promise.GetFuture(); } - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedUnsafe( const std::string& table, TValue&& rows, @@ -1053,8 +1052,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedU return promise.GetFuture(); } - - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { auto request = MakeOperationRequest(settings); request.set_table(TStringType{table}); @@ -1081,7 +1078,6 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, return promise.GetFuture(); } - TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings) { From f1e26b8da49d986318e1a76b3f8a8a4ba1cc8459 Mon Sep 17 00:00:00 2001 From: Nikolay Perfilov Date: Mon, 23 Jun 2025 18:09:36 +0300 Subject: [PATCH 36/41] Add Arena option to BulkUpsert, add hidden --send-format option --- .../ydb_cli/commands/ydb_service_import.cpp | 13 +- .../lib/ydb_cli/commands/ydb_service_import.h | 2 + ydb/public/lib/ydb_cli/common/csv_parser.cpp | 14 +- ydb/public/lib/ydb_cli/import/import.cpp | 165 +++++++++--------- ydb/public/lib/ydb_cli/import/import.h | 8 + ydb/public/lib/ydb_cli/import/ya.make | 2 + .../include/ydb-cpp-sdk/client/table/table.h | 71 +------- .../include/ydb-cpp-sdk/client/value/value.h | 10 +- .../src/client/table/impl/table_client.cpp | 114 ++++-------- .../cpp/src/client/table/impl/table_client.h | 9 - ydb/public/sdk/cpp/src/client/table/table.cpp | 15 -- ydb/public/sdk/cpp/src/client/value/value.cpp | 18 -- 12 files changed, 156 insertions(+), 285 deletions(-) diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_import.cpp b/ydb/public/lib/ydb_cli/commands/ydb_service_import.cpp index e83f89e11c66..509a7ccc7302 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_import.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_import.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -302,6 +301,17 @@ void TCommandImportFromCsv::Config(TConfig& config) { config.Opts->AddLongOption("newline-delimited", "No newline characters inside records, enables some import optimizations (see docs)") .StoreTrue(&NewlineDelimited); + TStringStream description; + description << "Format that data will be serialized to before sending to YDB. Available options: "; + NColorizer::TColors colors = NColorizer::AutoColors(Cout); + description << "\n " << colors.BoldColor() << "tvalue" << colors.OldColor() + << "\n " << "A default YDB protobuf format"; + description << "\n " << colors.BoldColor() << "arrow" << colors.OldColor() + << "\n " << "Apache Arrow format"; + description << "\nDefault: " << colors.CyanColor() << "\"" << "tvalue" << "\"" << colors.OldColor() << "."; + config.Opts->AddLongOption("send-format", description.Str()) + .RequiredArgument("STRING").StoreResult(&SendFormat) + .Hidden(); if (InputFormat == EDataFormat::Csv) { config.Opts->AddLongOption("delimiter", "Field delimiter in rows") .RequiredArgument("STRING").StoreResult(&Delimiter).DefaultValue(Delimiter); @@ -325,6 +335,7 @@ int TCommandImportFromCsv::Run(TConfig& config) { settings.HeaderRow(HeaderRow); settings.NullValue(NullValue); settings.Verbose(config.IsVerbose()); + settings.SendFormat(SendFormat); if (Delimiter.size() != 1) { throw TMisuseException() diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_import.h b/ydb/public/lib/ydb_cli/commands/ydb_service_import.h index c6eb4f57e6c4..dd1f85fa18b1 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_import.h +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_import.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace NYdb::NConsoleClient { @@ -86,6 +87,7 @@ class TCommandImportFromCsv : public TCommandImportFileBase { ui32 SkipRows = 0; bool Header = false; bool NewlineDelimited = true; + NConsoleClient::ESendFormat SendFormat = NConsoleClient::ESendFormat::Default; }; class TCommandImportFromTsv : public TCommandImportFromCsv { diff --git a/ydb/public/lib/ydb_cli/common/csv_parser.cpp b/ydb/public/lib/ydb_cli/common/csv_parser.cpp index 1b5c3930f9c3..6da0db843cf4 100644 --- a/ydb/public/lib/ydb_cli/common/csv_parser.cpp +++ b/ydb/public/lib/ydb_cli/common/csv_parser.cpp @@ -535,8 +535,8 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f } // Original path with local value object - Ydb::Value valueProto; - auto* listItems = valueProto.mutable_items(); + Ydb::Value listProtoValue; + auto* listItems = listProtoValue.mutable_items(); listItems->Reserve(lines.size()); for (const auto& line : lines) { @@ -547,7 +547,7 @@ TValue TCsvParser::BuildList(const std::vector& lines, const TString& f } // Return a TValue that takes ownership via move - return TValue(ResultListType.value(), std::move(valueProto)); + return TValue(ResultListType.value(), std::move(listProtoValue)); } TValue TCsvParser::BuildListOnArena( @@ -565,8 +565,8 @@ TValue TCsvParser::BuildListOnArena( } // allocate Ydb::Value on arena - Ydb::Value* value = google::protobuf::Arena::CreateMessage(arena); - auto* listItems = value->mutable_items(); + Ydb::Value* listProtoValue = google::protobuf::Arena::CreateMessage(arena); + auto* listItems = listProtoValue->mutable_items(); listItems->Reserve(lines.size()); for (const auto& line : lines) { @@ -577,7 +577,7 @@ TValue TCsvParser::BuildListOnArena( } // Return a TValue that references the arena-allocated message - return TValue(ResultListType.value(), value); + return TValue(ResultListType.value(), listProtoValue); } // Helper method to process a single CSV line @@ -605,7 +605,7 @@ void TCsvParser::ProcessCsvLine( TStringBuf nextField = Consume(splitter, meta, *headerIt); if (!*skipIt) { TValue builtValue = FieldToValue(*typeParserIt->get(), nextField, NullValue, meta, *headerIt); - *structItems->Add() = std::move(builtValue).ExtractProto(); + *structItems->Add() = std::move(builtValue.GetProto()); ++typeParserIt; } diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index d3f5ddc41e52..5a798618160d 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -906,7 +906,7 @@ TAsyncStatus TImportFileClient::TImpl::UpsertTValueBuffer(const TString& dbPath, // to prevent copying data in retryFunc in a happy way when there is only one request TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(); prebuiltValue = std::nullopt; - return tableClient.BulkUpsertUnretryableUnsafe(dbPath, std::move(builtValue), UpsertSettings) + return tableClient.BulkUpsert(dbPath, std::move(builtValue), UpsertSettings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); return NThreading::MakeFuture(status); @@ -994,8 +994,10 @@ inline TAsyncStatus TImportFileClient::TImpl::UpsertTValueBufferOnArena( TValue builtValue = prebuiltValue.has_value() ? std::move(prebuiltValue.value()) : buildFunc(arena.get()); prebuiltValue = std::nullopt; - return tableClient.BulkUpsertUnretryableArenaAllocatedUnsafe( - dbPath, std::move(builtValue), arena.get(), UpsertSettings) + auto settings = UpsertSettings; + settings.Arena(arena.get()); + return tableClient.BulkUpsert( + dbPath, std::move(builtValue), settings) .Apply([](const NYdb::NTable::TAsyncBulkUpsertResult& bulkUpsertResult) { NYdb::TStatus status = bulkUpsertResult.GetValueSync(); return NThreading::MakeFuture(status); @@ -1109,94 +1111,89 @@ TStatus TImportFileClient::TImpl::UpsertCsv(IInputStream& input, constexpr auto codecType = arrow::Compression::type::ZSTD; writeOptions.codec = *arrow::util::Codec::Create(codecType); - // determines if we should fallback to YDB Types (TValue with Protobuf arena) when Apache Arrow failed. - // Apache Arrow tends to be faster but doesn't support all YDB types (e.g. TzDatetime). On the other hand, - // YDB Types are slower but support all YDB types. - std::atomic_bool fallbackToYDBTypes = false; - - auto upsertCsvFunc = [&](std::vector&& buffer, ui64 row, std::shared_ptr batchStatus) { - // by default, we attempt to send data via Apache Arrow, if it fails, we fallback to YDB Types (TValue with Protobuf arena) - if (!fallbackToYDBTypes.load()) { - const i64 estimatedCsvLineLength = (!buffer.empty() ? 2 * buffer.front().size() : 10'000); - TStringBuilder data; - data.reserve((buffer.size() + (Settings.Header_ ? 1 : 0)) * estimatedCsvLineLength); - // insert header if it is present in the given csv file - if (Settings.Header_) { - data << parser.GetHeaderRow() << Endl; - } - data << JoinSeq("\n", buffer) << Endl; - - // I: attempt to send data via Apache Arrow - // if header is present, it is expected to be the first line of the data - TString error; - auto arrowCsv = NKikimr::NFormats::TArrowCSVTable::Create(columns, Settings.Header_); - if (arrowCsv.ok()) { - if (auto batch = arrowCsv->ReadSingleBatch(data, csvSettings, error)) { - if (!error) { - // batch was read successfully, sending data via Apache Arrow - UpsertTValueBufferParquet(dbPath, std::move(batch), writeOptions) - .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { - jobInflightManager->ReleaseJob(); - if (asyncStatus.GetValueSync().IsSuccess()) { - batchStatus->Completed = true; - if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, - "Couldn't add worker func to save progress")); - } + switch (Settings.SendFormat_) { + case ESendFormat::Default: + case ESendFormat::TValue: + { + auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { + try { + return parser.BuildListOnArena(buffer, filePath, arena, row); + } catch (const std::exception& e) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); + } + jobInflightManager->ReleaseJob(); + throw; + } + }; + + UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + jobInflightManager->ReleaseJob(); + if (asyncStatus.GetValueSync().IsSuccess()) { + batchStatus->Completed = true; + if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + "Couldn't add worker func to save progress")); } - return asyncStatus; - }); - } - else { - error = "Error while reading a batch from Apache Arrow: " + error; - fallbackToYDBTypes.store(true); - } - } - else { - error = "Could not read a batch from Apache Arrow"; - fallbackToYDBTypes.store(true); + } + return asyncStatus; + }); } - } - else { - error = arrowCsv.status().ToString(); - fallbackToYDBTypes.store(true); - } - - // print the error if fallback requested - if (fallbackToYDBTypes.load()) { - std::cerr << "Error when trying to import via Apache Arrow: '" << error << "'.\n" - << "Falling back to import via YDB Types (no action needed from your side)." << std::endl; - } - } - - // check if fallback requested (after first Apache Arrow failure, we will always fallback to YDB Types) - if (fallbackToYDBTypes.load()) { - // II: fallback to TValue with Protobuf arena because Apache Arrow failed - auto buildOnArenaFunc = [&, buffer = std::move(buffer), row, this] (google::protobuf::Arena* arena) mutable { - try { - return parser.BuildListOnArena(buffer, filePath, arena, row); - } catch (const std::exception& e) { - if (!Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, e.what())); + break; + case ESendFormat::ApacheArrow: + { + const i64 estimatedCsvLineLength = (!buffer.empty() ? 2 * buffer.front().size() : 10'000); + TStringBuilder data; + data.reserve((buffer.size() + (Settings.Header_ ? 1 : 0)) * estimatedCsvLineLength); + // insert header if it is present in the given csv file + if (Settings.Header_) { + data << parser.GetHeaderRow() << Endl; + } + data << JoinSeq("\n", buffer) << Endl; + + // if header is present, it is expected to be the first line of the data + TString error; + auto arrowCsv = NKikimr::NFormats::TArrowCSVTable::Create(columns, Settings.Header_); + if (arrowCsv.ok()) { + if (auto batch = arrowCsv->ReadSingleBatch(data, csvSettings, error)) { + if (!error) { + // batch was read successfully, sending data via Apache Arrow + UpsertTValueBufferParquet(dbPath, std::move(batch), writeOptions) + .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { + jobInflightManager->ReleaseJob(); + if (asyncStatus.GetValueSync().IsSuccess()) { + batchStatus->Completed = true; + if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + "Couldn't add worker func to save progress")); + } + } + return asyncStatus; + }); + } else { + error = "Error while reading a batch from Apache Arrow: " + error; + } + } else { + error = "Could not read a batch from Apache Arrow"; + } + } else { + error = arrowCsv.status().ToString(); } - jobInflightManager->ReleaseJob(); - throw; - } - }; - UpsertTValueBufferOnArena(dbPath, std::move(buildOnArenaFunc)) - .Apply([&, batchStatus](const TAsyncStatus& asyncStatus) { - jobInflightManager->ReleaseJob(); - if (asyncStatus.GetValueSync().IsSuccess()) { - batchStatus->Completed = true; - if (!FileProgressPool->AddFunc(saveProgressIfAny) && !Failed.exchange(true)) { - ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, - "Couldn't add worker func to save progress")); + if (!error.empty()) { + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, error)); } } - return asyncStatus; - }); + } + break; + default: + if (!Failed.exchange(true)) { + ErrorStatus = MakeHolder(MakeStatus(EStatus::INTERNAL_ERROR, + (TStringBuilder() << "Unknown send format: " << Settings.SendFormat_).c_str())); + } } }; diff --git a/ydb/public/lib/ydb_cli/import/import.h b/ydb/public/lib/ydb_cli/import/import.h index 814298dff73e..86b3df069881 100644 --- a/ydb/public/lib/ydb_cli/import/import.h +++ b/ydb/public/lib/ydb_cli/import/import.h @@ -35,6 +35,13 @@ class TImportClient; namespace NConsoleClient { +// EDataFormat to be used in operations related to structured data +enum class ESendFormat { + Default /* "default" */, + TValue /* "tvalue" */, + ApacheArrow /* "arrow" */ +}; + struct TImportFileSettings : public TOperationRequestSettings { using TSelf = TImportFileSettings; @@ -60,6 +67,7 @@ struct TImportFileSettings : public TOperationRequestSettings, NullValue, std::nullopt); FLUENT_SETTING_DEFAULT(bool, Verbose, false); + FLUENT_SETTING_DEFAULT(ESendFormat, SendFormat, ESendFormat::Default); }; class TImportFileClient { diff --git a/ydb/public/lib/ydb_cli/import/ya.make b/ydb/public/lib/ydb_cli/import/ya.make index b6b4ff8c1754..9e28d5f8ac87 100644 --- a/ydb/public/lib/ydb_cli/import/ya.make +++ b/ydb/public/lib/ydb_cli/import/ya.make @@ -14,4 +14,6 @@ PEERDIR( library/cpp/string_utils/csv ) +GENERATE_ENUM_SERIALIZATION(import.h) + END() diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h index 3e1dcf28dc15..0e7aa8e81d6b 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h @@ -1162,6 +1162,8 @@ struct TBulkUpsertSettings : public TOperationRequestSettings { @@ -1260,75 +1262,6 @@ class TTableClient { //! Using the client after call this method causes UB NThreading::TFuture Stop(); - /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model. - * - * This method is not retryable with the same `rows` object because it moves the underlying data - * from the TValue object into the request proto model. The TValue class uses a shared pointer - * to store its implementation data, so when you make a copy of a TValue object, both copies - * share the same underlying data. When this method moves the data from one TValue instance, - * all other instances sharing the same implementation pointer will be left with moved-from data. - * - * Example of incorrect usage: - * ``` - * TValue originalValue = BuildValue(); - * retry { - * TValue copy = originalValue; // Both originalValue and copy share the same data - * client.BulkUpsertUnretryableUnsafe(std::move(copy)); // Moves data from copy, leaving originalValue in moved-from state - * } - * ``` - * CAUTION: It is unsafe to use the instance of `originalValue` because its underlying data is moved! - * - * To safely retry the operation, you need to create a new TValue with fresh data for each attempt. - */ - TAsyncBulkUpsertResult BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, - const TBulkUpsertSettings& settings); - - /** - * @brief Upsert data into table moving `Ydb::Value` from `rows` into request proto model. The `Ydb::Value` of `TValue` must be arena-allocated. - * - * This method is not retryable with the same `rows` object because it moves the underlying data - * from the `TValue` object into the request proto model. The `TValue` class uses a shared pointer - * to store its implementation data, so when you make a copy of a `TValue` object, both copies - * share the same underlying data. When this method moves the data from one `TValue` instance, - * all other instances sharing the same implementation pointer will be left with moved-from data. - * - * IMPORTANT: The `rows` parameter must contain arena-allocated protobuf data. This method - * should only be called when the `TValue`'s underlying data was allocated using the provided - * protobuf arena. Using this method with non-arena-allocated data will lead to undefined behavior. - * It is the caller's responsibility to ensure that the lifetime of the used arena is longer than the lifetime of the `TValue` object during the operation. - * - * @see: https://protobuf.dev/reference/cpp/arenas - * - * Example of incorrect usage: - * ``` - * TValue originalValue = BuildValue(); // Not arena-allocated - * retry { - * TValue copy = originalValue; // Both originalValue and copy share the same data - * client.BulkUpsertUnretryableArenaAllocated(std::move(copy), arena); // WRONG: data not arena-allocated - * } - * ``` - * - * Example of correct usage: - * ``` - * google::protobuf::Arena arena; - * TValue arenaValue = BuildValueOnArena(&arena); // Value built using arena allocation - * client.BulkUpsertUnretryableArenaAllocated(std::move(arenaValue), &arena); - * ``` - * - * To safely retry the operation, you need to create a new `TValue` with fresh arena-allocated data for each attempt. - * - * @param arena The protobuf arena that was used to allocate the data in `rows`. Must be the same - * arena that was used to create the TValue's underlying data. - * @param settings The settings for the bulk upsert operation. - */ - TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocatedUnsafe( - const std::string& table, - TValue&& rows, - google::protobuf::Arena* arena, - const TBulkUpsertSettings& settings - ); - //! Non-transactional fast bulk write. //! Interanlly it uses an implicit session and thus doesn't need a session to be passed. //! "rows" parameter must be a list of structs where each stuct represents one row. diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h index 32e4f4f99fec..9003d13c5dea 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/value/value.h @@ -29,8 +29,6 @@ class TType { const Ydb::Type& GetProto() const; Ydb::Type& GetProto(); - Ydb::Type&& ExtractProto() &&; - private: class TImpl; std::shared_ptr Impl_; @@ -278,6 +276,12 @@ class TValue { public: TValue(const TType& type, const Ydb::Value& valueProto); TValue(const TType& type, Ydb::Value&& valueProto); + /** + * Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. + * The `Ydb::Value` is expected to be arena-allocated. + * + * See: https://protobuf.dev/reference/cpp/arenas + */ TValue(const TType& type, Ydb::Value* arenaAllocatedValueProto); const TType& GetType() const; @@ -285,8 +289,6 @@ class TValue { const Ydb::Value& GetProto() const; Ydb::Value& GetProto(); - - Ydb::Value&& ExtractProto() &&; private: class TImpl; std::shared_ptr Impl_; diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 9b73fc276097..29dedd132cff 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -990,91 +990,49 @@ void TTableClient::TImpl::SetStatCollector(const NSdkStats::TStatCollector::TCli } TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings, bool canMove) { - auto request = MakeOperationRequest(settings); - request.set_table(TStringType{table}); - if (canMove) { - request.mutable_rows()->mutable_type()->Swap(&rows.GetType().GetProto()); - request.mutable_rows()->mutable_value()->Swap(&rows.GetProto()); + Ydb::Table::BulkUpsertRequest* request = nullptr; + std::unique_ptr holder; + + if (settings.Arena_) { + request = MakeOperationRequestOnArena(settings, settings.Arena_); } else { - *request.mutable_rows()->mutable_type() = TProtoAccessor::GetProto(rows.GetType()); - *request.mutable_rows()->mutable_value() = rows.GetProto(); + holder = std::make_unique(MakeOperationRequest(settings)); + request = holder.get(); } - auto promise = NewPromise(); - - auto extractor = [promise] - (google::protobuf::Any* any, TPlainStatus status) mutable { - Y_UNUSED(any); - TBulkUpsertResult val(TStatus(std::move(status))); - promise.SetValue(std::move(val)); - }; - - Connections_->RunDeferred( - std::move(request), - extractor, - &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, - DbDriverState_, - INITIAL_DEFERRED_CALL_DELAY, - TRpcRequestSettings::Make(settings)); - - return promise.GetFuture(); -} - -TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsertUnretryableArenaAllocatedUnsafe( - const std::string& table, - TValue&& rows, - google::protobuf::Arena* arena, - const TBulkUpsertSettings& settings -) { - auto request = MakeOperationRequestOnArena(settings, arena); request->set_table(TStringType{table}); - *request->mutable_rows()->mutable_type() = std::move(rows.GetType()).ExtractProto(); - *request->mutable_rows()->mutable_value() = std::move(rows).ExtractProto(); - - auto promise = NewPromise(); - - auto extractor = [promise] - (google::protobuf::Any* any, TPlainStatus status) mutable { - Y_UNUSED(any); - TBulkUpsertResult val(TStatus(std::move(status))); - promise.SetValue(std::move(val)); - }; - - // don't give ownership of request to the function - Connections_->RunDeferredOnArena( - request, - extractor, - &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, - DbDriverState_, - INITIAL_DEFERRED_CALL_DELAY, - TRpcRequestSettings::Make(settings)); - - return promise.GetFuture(); -} - -TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { - auto request = MakeOperationRequest(settings); - request.set_table(TStringType{table}); - *request.mutable_rows()->mutable_type() = TProtoAccessor::GetProto(rows.GetType()); - *request.mutable_rows()->mutable_value() = rows.GetProto(); + if (canMove) { + request->mutable_rows()->mutable_type()->Swap(&rows.GetType().GetProto()); + request->mutable_rows()->mutable_value()->Swap(&rows.GetProto()); + } else { + *request->mutable_rows()->mutable_type() = TProtoAccessor::GetProto(rows.GetType()); + *request->mutable_rows()->mutable_value() = rows.GetProto(); + } auto promise = NewPromise(); + auto extractor = [promise](google::protobuf::Any* any, TPlainStatus status) mutable { + Y_UNUSED(any); + TBulkUpsertResult val(TStatus(std::move(status))); + promise.SetValue(std::move(val)); + }; - auto extractor = [promise] - (google::protobuf::Any* any, TPlainStatus status) mutable { - Y_UNUSED(any); - TBulkUpsertResult val(TStatus(std::move(status))); - promise.SetValue(std::move(val)); - }; - - Connections_->RunDeferred( - std::move(request), - extractor, - &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, - DbDriverState_, - INITIAL_DEFERRED_CALL_DELAY, - TRpcRequestSettings::Make(settings)); - + if (settings.Arena_) { + Connections_->RunDeferredOnArena( + request, + extractor, + &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings)); + } else { + Connections_->RunDeferred( + std::move(*holder), + extractor, + &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings)); + } return promise.GetFuture(); } diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h index 8bcfa7640462..d1693310a83b 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.h +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.h @@ -138,15 +138,6 @@ class TTableClient::TImpl: public TClientImplCommon, public void SetStatCollector(const NSdkStats::TStatCollector::TClientStatCollector& collector); - TAsyncBulkUpsertResult BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings); - - TAsyncBulkUpsertResult BulkUpsertUnretryableArenaAllocatedUnsafe( - const std::string& table, - TValue&& rows, - google::protobuf::Arena* arena, - const TBulkUpsertSettings& settings - ); - TAsyncBulkUpsertResult BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings, bool canMove); TAsyncBulkUpsertResult BulkUpsert(const std::string& table, EDataFormat format, const std::string& data, const std::string& schema, const TBulkUpsertSettings& settings); diff --git a/ydb/public/sdk/cpp/src/client/table/table.cpp b/ydb/public/sdk/cpp/src/client/table/table.cpp index bb50789afb76..fd64125bfedd 100644 --- a/ydb/public/sdk/cpp/src/client/table/table.cpp +++ b/ydb/public/sdk/cpp/src/client/table/table.cpp @@ -1457,21 +1457,6 @@ NThreading::TFuture TTableClient::Stop() { return Impl_->Stop(); } -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableUnsafe(const std::string& table, TValue&& rows, - const TBulkUpsertSettings& settings) -{ - return Impl_->BulkUpsertUnretryableUnsafe(table, std::move(rows), settings); -} - -TAsyncBulkUpsertResult TTableClient::BulkUpsertUnretryableArenaAllocatedUnsafe( - const std::string& table, - TValue&& rows, - google::protobuf::Arena* arena, - const TBulkUpsertSettings& settings) -{ - return Impl_->BulkUpsertUnretryableArenaAllocatedUnsafe(table, std::move(rows), arena, settings); -} - TAsyncBulkUpsertResult TTableClient::BulkUpsert(const std::string& table, TValue&& rows, const TBulkUpsertSettings& settings) { diff --git a/ydb/public/sdk/cpp/src/client/value/value.cpp b/ydb/public/sdk/cpp/src/client/value/value.cpp index 34b489cd7ec6..47ae325b35ff 100644 --- a/ydb/public/sdk/cpp/src/client/value/value.cpp +++ b/ydb/public/sdk/cpp/src/client/value/value.cpp @@ -111,10 +111,6 @@ Ydb::Type& TType::GetProto() return Impl_->ProtoType_; } -Ydb::Type&& TType::ExtractProto() && { - return std::move(Impl_->ProtoType_); -} - //////////////////////////////////////////////////////////////////////////////// class TTypeParser::TImpl { @@ -1058,12 +1054,6 @@ class TValue::TImpl { , ProtoValue_(std::move(valueProto)) , ArenaAllocatedValueProto_(nullptr) {} - /** - * Lifetime of the arena, and hence the `Ydb::Value`, is expected to be managed by the caller. - * The `Ydb::Value` is expected to be arena-allocated. - * - * See: https://protobuf.dev/reference/cpp/arenas - */ TImpl(const TType& type, Ydb::Value* arenaAllocatedValueProto) : Type_(type) , ProtoValue_{} @@ -1077,10 +1067,6 @@ class TValue::TImpl { return ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_; } - Ydb::Value&& ExtractProto() && { - return std::move(ArenaAllocatedValueProto_ ? *ArenaAllocatedValueProto_ : ProtoValue_); - } - TType Type_; private: Ydb::Value ProtoValue_; @@ -1114,10 +1100,6 @@ Ydb::Value& TValue::GetProto() { return Impl_->GetProto(); } -Ydb::Value&& TValue::ExtractProto() && { - return std::move(std::move(*Impl_).ExtractProto()); -} - //////////////////////////////////////////////////////////////////////////////// class TValueParser::TImpl { From 7d7f71b6eca3e561be70038dcc5a7ea8f3fc58a6 Mon Sep 17 00:00:00 2001 From: Nikolay Perfilov Date: Mon, 23 Jun 2025 19:54:52 +0300 Subject: [PATCH 37/41] Revert changes in grpc_connections, use TRequestWrapper --- .../grpc_connections/grpc_connections.h | 583 +++++++----------- .../src/client/table/impl/table_client.cpp | 2 +- 2 files changed, 234 insertions(+), 351 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 0a608fe85171..d617a2422d12 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -140,117 +140,247 @@ class TGRpcConnectionsImpl TRequest, TResponse>::TAsyncRequest; + template + class TRequestWrapper { + public: + // Implicit conversion from rvalue reference + TRequestWrapper(TRequest&& request) + : storage_(std::move(request)) + , isPointer_(false) + {} + + // Implicit conversion from pointer. Means that request is allocated on Arena + TRequestWrapper(TRequest* request) + : storage_(request) + , isPointer_(true) + {} + + // Copy constructor + TRequestWrapper(const TRequestWrapper& other) = default; + + // Move constructor + TRequestWrapper(TRequestWrapper&& other) = default; + + // Copy assignment + TRequestWrapper& operator=(const TRequestWrapper& other) = default; + + // Move assignment + TRequestWrapper& operator=(TRequestWrapper&& other) = default; + + template + void DoRequest( + std::unique_ptr>& serviceConnection, + NYdbGrpc::TAdvancedResponseCallback&& responseCbLow, + typename NYdbGrpc::TSimpleRequestProcessor::TAsyncRequest rpc, + const TCallMeta& meta, + IQueueClientContext* context) + { + if (isPointer_) { + serviceConnection->DoAdvancedRequest(*std::get(storage_), + std::move(responseCbLow), rpc, meta, context); + } else { + serviceConnection->DoAdvancedRequest(std::move(std::get(storage_)), + std::move(responseCbLow), rpc, meta, context); + } + } + + private: + std::variant storage_; + bool isPointer_; + }; + template void Run( - TRequest&& request, + TRequestWrapper&& requestWrapper, TResponseCb&& userResponseCb, TSimpleRpc rpc, TDbDriverStatePtr dbState, const TRpcRequestSettings& requestSettings, std::shared_ptr context = nullptr) { - RunImpl( - std::forward(request), - std::move(userResponseCb), - rpc, - dbState, - requestSettings, - std::move(context) - ); - } + using NYdbGrpc::TGrpcStatus; + using TConnection = std::unique_ptr>; + Y_ABORT_UNLESS(dbState); - template - void RunOnArena( - TRequest* request, - TResponseCb&& userResponseCb, - TSimpleRpc rpc, - TDbDriverStatePtr dbState, - const TRpcRequestSettings& requestSettings, - std::shared_ptr context = nullptr) - { - RunImpl( - request, - std::move(userResponseCb), - rpc, - dbState, - requestSettings, - std::move(context) - ); + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); + return; + } + + if (dbState->StatCollector.IsCollecting()) { + std::weak_ptr weakState = dbState; + const auto startTime = TInstant::Now(); + userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { + const auto resultSize = response ? response->ByteSizeLong() : 0; + cb(response, status); + + if (auto state = weakState.lock()) { + state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); + state->StatCollector.IncResultSize(resultSize); + } + }); + } + + WithServiceConnection( + [this, requestWrapper = std::move(requestWrapper), userResponseCb = std::move(userResponseCb), rpc, + requestSettings, context = std::move(context), dbState] + (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { + if (!status.Ok()) { + userResponseCb( + nullptr, + std::move(status)); + return; + } + + TCallMeta meta; + meta.Timeout = requestSettings.ClientTimeout; + #ifndef YDB_GRPC_UNSECURE_AUTH + meta.CallCredentials = dbState->CallCredentials; + #else + if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { + try { + meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); + } catch (const std::exception& e) { + userResponseCb( + nullptr, + TPlainStatus( + EStatus::CLIENT_UNAUTHENTICATED, + TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() + ) + ); + return; + } + } + #endif + if (!requestSettings.TraceId.empty()) { + meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); + } + + if (!requestSettings.RequestType.empty()) { + meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); + } + + if (!dbState->Database.empty()) { + SetDatabaseHeader(meta, dbState->Database); + } + + static const std::string clientPid = GetClientPIDHeaderValue(); + + meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); + meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); + meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); + + dbState->StatCollector.IncGRpcInFlight(); + dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); + + NYdbGrpc::TAdvancedResponseCallback responseCbLow = + [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] + (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { + dbState->StatCollector.DecGRpcInFlight(); + dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); + + if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { + std::multimap metadata; + + for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { + metadata.emplace( + std::string(name.begin(), name.end()), + std::string(value.begin(), value.end())); + } + + auto resp = new TResult( + std::move(response), + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint(), + std::move(metadata)); + + EnqueueResponse(resp); + } else { + dbState->StatCollector.IncReqFailDueTransportError(); + dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); + + auto resp = new TGRpcErrorResponse( + std::move(grpcStatus), + std::move(userResponseCb), + this, + std::move(context), + endpoint.GetEndpoint()); + + dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); + + EnqueueResponse(resp); + } + }; + + requestWrapper.DoRequest(serviceConnection, std::move(responseCbLow), rpc, meta, context.get()); + }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } template void RunDeferred( - TRequest&& request, + TRequestWrapper&& requestWrapper, TDeferredOperationCb&& userResponseCb, TSimpleRpc rpc, TDbDriverStatePtr dbState, TDuration deferredTimeout, const TRpcRequestSettings& requestSettings, bool poll = false, - std::shared_ptr context = nullptr) { - RunDeferredImpl( - std::forward(request), - std::move(userResponseCb), - rpc, - dbState, - deferredTimeout, - requestSettings, - poll, - std::move(context) - ); - } + std::shared_ptr context = nullptr) + { + if (!TryCreateContext(context)) { + TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); + userResponseCb(nullptr, status); + return; + } - template - void RunDeferredOnArena( - TRequest* request, - TDeferredOperationCb&& userResponseCb, - TSimpleRpc rpc, - TDbDriverStatePtr dbState, - TDuration deferredTimeout, - const TRpcRequestSettings& requestSettings, - bool poll = false, - std::shared_ptr context = nullptr) { - RunDeferredImpl( - request, - std::move(userResponseCb), - rpc, - dbState, - deferredTimeout, - requestSettings, - poll, - std::move(context) - ); - } + auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] + (TResponse* response, TPlainStatus status) mutable + { + if (response) { + Ydb::Operations::Operation* operation = response->mutable_operation(); + if (!operation->ready() && poll) { + auto action = MakeIntrusive( + operation->id(), + std::move(userResponseCb), + this, + std::move(context), + deferredTimeout, + dbState, + status.Endpoint); - // Run request using discovery endpoint. - // Mostly usefull to make calls from credential provider - template - static void RunOnDiscoveryEndpoint( - std::shared_ptr facility, - TRequest&& request, - TResponseCb&& responseCb, - TSimpleRpc rpc, - TRpcRequestSettings requestSettings) - { - requestSettings.EndpointPolicy = TRpcRequestSettings::TEndpointPolicy::UseDiscoveryEndpoint; - requestSettings.UseAuth = false; - // TODO: Change implementation of Run, to use ICoreFacility and remove this cast - auto dbState = std::dynamic_pointer_cast(facility); - Y_ABORT_UNLESS(dbState); - auto self = dynamic_cast(dbState->Client); - Y_ABORT_UNLESS(self); - self->Run( - std::move(request), - std::move(responseCb), + action->Start(); + } else { + NYdb::NIssue::TIssues opIssues; + NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); + userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), + status.Endpoint, std::move(status.Metadata)}); + } + } else { + userResponseCb(nullptr, status); + } + }; + + Run( + std::move(requestWrapper), + responseCb, rpc, dbState, requestSettings, - nullptr); + std::move(context)); } template void RunDeferred( - TRequest&& request, + TRequestWrapper&& requestWrapper, TDeferredResultCb&& userResponseCb, TSimpleRpc rpc, TDbDriverStatePtr dbState, @@ -268,7 +398,7 @@ class TGRpcConnectionsImpl }; RunDeferred( - std::forward(request), + std::move(requestWrapper), operationCb, rpc, dbState, @@ -278,34 +408,30 @@ class TGRpcConnectionsImpl context); } -template - void RunDeferredOnArena( - TRequest* request, - TDeferredResultCb&& userResponseCb, + // Run request using discovery endpoint. + // Mostly usefull to make calls from credential provider + template + static void RunOnDiscoveryEndpoint( + std::shared_ptr facility, + TRequest&& request, + TResponseCb&& responseCb, TSimpleRpc rpc, - TDbDriverStatePtr dbState, - TDuration deferredTimeout, - const TRpcRequestSettings& requestSettings, - std::shared_ptr context = nullptr) + TRpcRequestSettings requestSettings) { - auto operationCb = [userResponseCb = std::move(userResponseCb)](Ydb::Operations::Operation* operation, TPlainStatus status) mutable { - if (operation) { - status.SetCostInfo(std::move(*operation->mutable_cost_info())); - userResponseCb(operation->mutable_result(), std::move(status)); - } else { - userResponseCb(nullptr, std::move(status)); - } - }; - - RunDeferredOnArena( - request, - operationCb, + requestSettings.EndpointPolicy = TRpcRequestSettings::TEndpointPolicy::UseDiscoveryEndpoint; + requestSettings.UseAuth = false; + // TODO: Change implementation of Run, to use ICoreFacility and remove this cast + auto dbState = std::dynamic_pointer_cast(facility); + Y_ABORT_UNLESS(dbState); + auto self = dynamic_cast(dbState->Client); + Y_ABORT_UNLESS(self); + self->Run( + std::move(request), + std::move(responseCb), rpc, dbState, - deferredTimeout, requestSettings, - true, // poll - context); + nullptr); } template class TStream> @@ -529,249 +655,6 @@ template const TLog& GetLog() const override; private: - template - void RunImpl( - std::conditional_t request, - TResponseCb&& userResponseCb, - TSimpleRpc rpc, - TDbDriverStatePtr dbState, - const TRpcRequestSettings& requestSettings, - std::shared_ptr context = nullptr) - { - using NYdbGrpc::TGrpcStatus; - using TConnection = std::unique_ptr>; - Y_ABORT_UNLESS(dbState); - - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, TPlainStatus{status.Status, std::move(status.Issues)}); - return; - } - - if (dbState->StatCollector.IsCollecting()) { - std::weak_ptr weakState = dbState; - const auto startTime = TInstant::Now(); - userResponseCb = std::move([cb = std::move(userResponseCb), weakState, startTime](TResponse* response, TPlainStatus status) { - const auto resultSize = response ? response->ByteSizeLong() : 0; - cb(response, status); - - if (auto state = weakState.lock()) { - state->StatCollector.IncRequestLatency(TInstant::Now() - startTime); - state->StatCollector.IncResultSize(resultSize); - } - }); - } - - auto createMeta = [requestSettings, dbState](const TResponseCb& userResponseCb) -> std::optional { - TCallMeta meta; - meta.Timeout = requestSettings.ClientTimeout; - #ifndef YDB_GRPC_UNSECURE_AUTH - meta.CallCredentials = dbState->CallCredentials; - #else - if (requestSettings.UseAuth && dbState->CredentialsProvider && dbState->CredentialsProvider->IsValid()) { - try { - meta.Aux.push_back({ YDB_AUTH_TICKET_HEADER, GetAuthInfo(dbState) }); - } catch (const std::exception& e) { - userResponseCb( - nullptr, - TPlainStatus( - EStatus::CLIENT_UNAUTHENTICATED, - TStringBuilder() << "Can't get Authentication info from CredentialsProvider. " << e.what() - ) - ); - return std::nullopt; - } - } - #endif - if (!requestSettings.TraceId.empty()) { - meta.Aux.push_back({YDB_TRACE_ID_HEADER, requestSettings.TraceId}); - } - - if (!requestSettings.RequestType.empty()) { - meta.Aux.push_back({YDB_REQUEST_TYPE_HEADER, requestSettings.RequestType}); - } - - if (!dbState->Database.empty()) { - SetDatabaseHeader(meta, dbState->Database); - } - - static const std::string clientPid = GetClientPIDHeaderValue(); - - meta.Aux.push_back({YDB_SDK_BUILD_INFO_HEADER, CreateSDKBuildInfo()}); - meta.Aux.push_back({YDB_CLIENT_PID, clientPid}); - meta.Aux.insert(meta.Aux.end(), requestSettings.Header.begin(), requestSettings.Header.end()); - - return meta; - }; - - auto createResponseCbLow = [this, dbState]( - const TResponseCb& userResponseCb, - const TEndpointKey& endpoint, - std::shared_ptr context) -> NYdbGrpc::TAdvancedResponseCallback { - dbState->StatCollector.IncGRpcInFlight(); - dbState->StatCollector.IncGRpcInFlightByHost(endpoint.GetEndpoint()); - - NYdbGrpc::TAdvancedResponseCallback responseCbLow = - [this, context, userResponseCb = std::move(userResponseCb), endpoint, dbState] - (const grpc::ClientContext& ctx, TGrpcStatus&& grpcStatus, TResponse&& response) mutable -> void { - dbState->StatCollector.DecGRpcInFlight(); - dbState->StatCollector.DecGRpcInFlightByHost(endpoint.GetEndpoint()); - - if (NYdbGrpc::IsGRpcStatusGood(grpcStatus)) { - std::multimap metadata; - - for (const auto& [name, value] : ctx.GetServerInitialMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - for (const auto& [name, value] : ctx.GetServerTrailingMetadata()) { - metadata.emplace( - std::string(name.begin(), name.end()), - std::string(value.begin(), value.end())); - } - - auto resp = new TResult( - std::move(response), - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint(), - std::move(metadata)); - - EnqueueResponse(resp); - } else { - dbState->StatCollector.IncReqFailDueTransportError(); - dbState->StatCollector.IncTransportErrorsByHost(endpoint.GetEndpoint()); - - auto resp = new TGRpcErrorResponse( - std::move(grpcStatus), - std::move(userResponseCb), - this, - std::move(context), - endpoint.GetEndpoint()); - - dbState->EndpointPool.BanEndpoint(endpoint.GetEndpoint()); - - EnqueueResponse(resp); - } - }; - - return responseCbLow; - }; - - if constexpr (RequestOnArena) { - WithServiceConnection( - [request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState, - createMeta = std::move(createMeta), createResponseCbLow = std::move(createResponseCbLow)] - (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - if (!status.Ok()) { - userResponseCb( - nullptr, - std::move(status)); - return; - } - - std::optional metaOpt = createMeta(userResponseCb); - if (!metaOpt) { - return; - } - auto meta = std::move(*metaOpt); - auto responseCbLow = createResponseCbLow(userResponseCb, endpoint, context); - - serviceConnection->DoAdvancedRequest(*request, std::move(responseCbLow), rpc, meta, context.get()); - }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy - ); - } - else { - WithServiceConnection( - [request, userResponseCb = std::move(userResponseCb), rpc, requestSettings, context = std::move(context), dbState, - createMeta = std::move(createMeta), createResponseCbLow = std::move(createResponseCbLow)] - (TPlainStatus status, TConnection serviceConnection, TEndpointKey endpoint) mutable -> void { - if (!status.Ok()) { - userResponseCb( - nullptr, - std::move(status)); - return; - } - - std::optional metaOpt = createMeta(userResponseCb); - if (!metaOpt) { - return; - } - auto meta = std::move(*metaOpt); - auto responseCbLow = createResponseCbLow(userResponseCb, endpoint, context); - - serviceConnection->DoAdvancedRequest(std::move(request), std::move(responseCbLow), rpc, meta, context.get()); - }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy - ); - } - } - - template - void RunDeferredImpl( - std::conditional_t request, - TDeferredOperationCb&& userResponseCb, - TSimpleRpc rpc, - TDbDriverStatePtr dbState, - TDuration deferredTimeout, - const TRpcRequestSettings& requestSettings, - bool poll = false, - std::shared_ptr context = nullptr) { - if (!TryCreateContext(context)) { - TPlainStatus status(EStatus::CLIENT_CANCELLED, "Client is stopped"); - userResponseCb(nullptr, status); - return; - } - - auto responseCb = [this, userResponseCb = std::move(userResponseCb), dbState, deferredTimeout, poll, context] - (TResponse* response, TPlainStatus status) mutable - { - if (response) { - Ydb::Operations::Operation* operation = response->mutable_operation(); - if (!operation->ready() && poll) { - auto action = MakeIntrusive( - operation->id(), - std::move(userResponseCb), - this, - std::move(context), - deferredTimeout, - dbState, - status.Endpoint); - - action->Start(); - } else { - NYdb::NIssue::TIssues opIssues; - NYdb::NIssue::IssuesFromMessage(operation->issues(), opIssues); - userResponseCb(operation, TPlainStatus{static_cast(operation->status()), std::move(opIssues), - status.Endpoint, std::move(status.Metadata)}); - } - } else { - userResponseCb(nullptr, status); - } - }; - - if constexpr (RequestOnArena) { - RunOnArena( - request, - responseCb, - rpc, - dbState, - requestSettings, - std::move(context)); - } - else { - Run( - std::forward(request), - responseCb, - rpc, - dbState, - requestSettings, - std::move(context)); - } - } - template void WithServiceConnection(TCallback callback, TDbDriverStatePtr dbState, const TEndpointKey& preferredEndpoint, TRpcRequestSettings::TEndpointPolicy endpointPolicy) diff --git a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp index 29dedd132cff..160c3cf0b225 100644 --- a/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp +++ b/ydb/public/sdk/cpp/src/client/table/impl/table_client.cpp @@ -1017,7 +1017,7 @@ TAsyncBulkUpsertResult TTableClient::TImpl::BulkUpsert(const std::string& table, }; if (settings.Arena_) { - Connections_->RunDeferredOnArena( + Connections_->RunDeferred( request, extractor, &Ydb::Table::V1::TableService::Stub::AsyncBulkUpsert, From 37e57dd9ee5d01e8de6fc1e2d2dbf9c0effb1b7a Mon Sep 17 00:00:00 2001 From: Nikolay Perfilov Date: Mon, 23 Jun 2025 20:01:17 +0300 Subject: [PATCH 38/41] Fix naming --- .../grpc_connections/grpc_connections.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index d617a2422d12..7eb3dd953f77 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -145,14 +145,14 @@ class TGRpcConnectionsImpl public: // Implicit conversion from rvalue reference TRequestWrapper(TRequest&& request) - : storage_(std::move(request)) - , isPointer_(false) + : Storage_(std::move(request)) + , IsPointer_(false) {} // Implicit conversion from pointer. Means that request is allocated on Arena TRequestWrapper(TRequest* request) - : storage_(request) - , isPointer_(true) + : Storage_(request) + , IsPointer_(true) {} // Copy constructor @@ -175,18 +175,18 @@ class TGRpcConnectionsImpl const TCallMeta& meta, IQueueClientContext* context) { - if (isPointer_) { - serviceConnection->DoAdvancedRequest(*std::get(storage_), + if (IsPointer_) { + serviceConnection->DoAdvancedRequest(*std::get(Storage_), std::move(responseCbLow), rpc, meta, context); } else { - serviceConnection->DoAdvancedRequest(std::move(std::get(storage_)), + serviceConnection->DoAdvancedRequest(std::move(std::get(Storage_)), std::move(responseCbLow), rpc, meta, context); } } private: - std::variant storage_; - bool isPointer_; + std::variant Storage_; + bool IsPointer_; }; template From 9dcee4ad9c07118063461f03bd32a75d58b5b296 Mon Sep 17 00:00:00 2001 From: Nikolay Perfilov Date: Mon, 23 Jun 2025 20:05:19 +0300 Subject: [PATCH 39/41] Fix method order --- .../grpc_connections/grpc_connections.h | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 7eb3dd953f77..b06854f9cafd 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -325,6 +325,32 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } + // Run request using discovery endpoint. + // Mostly usefull to make calls from credential provider + template + static void RunOnDiscoveryEndpoint( + std::shared_ptr facility, + TRequest&& request, + TResponseCb&& responseCb, + TSimpleRpc rpc, + TRpcRequestSettings requestSettings) + { + requestSettings.EndpointPolicy = TRpcRequestSettings::TEndpointPolicy::UseDiscoveryEndpoint; + requestSettings.UseAuth = false; + // TODO: Change implementation of Run, to use ICoreFacility and remove this cast + auto dbState = std::dynamic_pointer_cast(facility); + Y_ABORT_UNLESS(dbState); + auto self = dynamic_cast(dbState->Client); + Y_ABORT_UNLESS(self); + self->Run( + std::move(request), + std::move(responseCb), + rpc, + dbState, + requestSettings, + nullptr); + } + template void RunDeferred( TRequestWrapper&& requestWrapper, @@ -408,32 +434,6 @@ class TGRpcConnectionsImpl context); } - // Run request using discovery endpoint. - // Mostly usefull to make calls from credential provider - template - static void RunOnDiscoveryEndpoint( - std::shared_ptr facility, - TRequest&& request, - TResponseCb&& responseCb, - TSimpleRpc rpc, - TRpcRequestSettings requestSettings) - { - requestSettings.EndpointPolicy = TRpcRequestSettings::TEndpointPolicy::UseDiscoveryEndpoint; - requestSettings.UseAuth = false; - // TODO: Change implementation of Run, to use ICoreFacility and remove this cast - auto dbState = std::dynamic_pointer_cast(facility); - Y_ABORT_UNLESS(dbState); - auto self = dynamic_cast(dbState->Client); - Y_ABORT_UNLESS(self); - self->Run( - std::move(request), - std::move(responseCb), - rpc, - dbState, - requestSettings, - nullptr); - } - template class TStream> using TStreamRpc = typename TStream< From 75787eea3e1a3b214be162695eb4878869b2814e Mon Sep 17 00:00:00 2001 From: Nikolay Perfilov Date: Mon, 23 Jun 2025 20:09:00 +0300 Subject: [PATCH 40/41] Fix method order 2 --- .../grpc_connections/grpc_connections.h | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index b06854f9cafd..6fedd21f630a 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -325,32 +325,6 @@ class TGRpcConnectionsImpl }, dbState, requestSettings.PreferredEndpoint, requestSettings.EndpointPolicy); } - // Run request using discovery endpoint. - // Mostly usefull to make calls from credential provider - template - static void RunOnDiscoveryEndpoint( - std::shared_ptr facility, - TRequest&& request, - TResponseCb&& responseCb, - TSimpleRpc rpc, - TRpcRequestSettings requestSettings) - { - requestSettings.EndpointPolicy = TRpcRequestSettings::TEndpointPolicy::UseDiscoveryEndpoint; - requestSettings.UseAuth = false; - // TODO: Change implementation of Run, to use ICoreFacility and remove this cast - auto dbState = std::dynamic_pointer_cast(facility); - Y_ABORT_UNLESS(dbState); - auto self = dynamic_cast(dbState->Client); - Y_ABORT_UNLESS(self); - self->Run( - std::move(request), - std::move(responseCb), - rpc, - dbState, - requestSettings, - nullptr); - } - template void RunDeferred( TRequestWrapper&& requestWrapper, @@ -404,6 +378,32 @@ class TGRpcConnectionsImpl std::move(context)); } + // Run request using discovery endpoint. + // Mostly usefull to make calls from credential provider + template + static void RunOnDiscoveryEndpoint( + std::shared_ptr facility, + TRequest&& request, + TResponseCb&& responseCb, + TSimpleRpc rpc, + TRpcRequestSettings requestSettings) + { + requestSettings.EndpointPolicy = TRpcRequestSettings::TEndpointPolicy::UseDiscoveryEndpoint; + requestSettings.UseAuth = false; + // TODO: Change implementation of Run, to use ICoreFacility and remove this cast + auto dbState = std::dynamic_pointer_cast(facility); + Y_ABORT_UNLESS(dbState); + auto self = dynamic_cast(dbState->Client); + Y_ABORT_UNLESS(self); + self->Run( + std::move(request), + std::move(responseCb), + rpc, + dbState, + requestSettings, + nullptr); + } + template void RunDeferred( TRequestWrapper&& requestWrapper, From b383743d93fa655c72494d3626d85a191b3adb27 Mon Sep 17 00:00:00 2001 From: Nikolay Perfilov Date: Mon, 23 Jun 2025 20:41:57 +0300 Subject: [PATCH 41/41] Remove redundant member --- .../impl/ydb_internal/grpc_connections/grpc_connections.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h index 6fedd21f630a..d65d408bf022 100644 --- a/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h +++ b/ydb/public/sdk/cpp/src/client/impl/ydb_internal/grpc_connections/grpc_connections.h @@ -146,13 +146,11 @@ class TGRpcConnectionsImpl // Implicit conversion from rvalue reference TRequestWrapper(TRequest&& request) : Storage_(std::move(request)) - , IsPointer_(false) {} // Implicit conversion from pointer. Means that request is allocated on Arena TRequestWrapper(TRequest* request) : Storage_(request) - , IsPointer_(true) {} // Copy constructor @@ -175,8 +173,8 @@ class TGRpcConnectionsImpl const TCallMeta& meta, IQueueClientContext* context) { - if (IsPointer_) { - serviceConnection->DoAdvancedRequest(*std::get(Storage_), + if (auto ptr = std::get_if(&Storage_)) { + serviceConnection->DoAdvancedRequest(**ptr, std::move(responseCbLow), rpc, meta, context); } else { serviceConnection->DoAdvancedRequest(std::move(std::get(Storage_)), @@ -186,7 +184,6 @@ class TGRpcConnectionsImpl private: std::variant Storage_; - bool IsPointer_; }; template